From 7cd6ca1d7902260b54528054d729f2a3b27e5a00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:34 +0000 Subject: arm64: vdso: Use GFP_KERNEL for allocating compat vdso and signal pages There's no need to allocate the compat vDSO and signal pages using GFP_ATOMIC allocations, so use GFP_KERNEL instead. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-2-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index cee5d04ea9ad..2d057a4dc787 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -299,7 +299,7 @@ static int aarch32_alloc_kuser_vdso_page(void) if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) return 0; - vdso_page = get_zeroed_page(GFP_ATOMIC); + vdso_page = get_zeroed_page(GFP_KERNEL); if (!vdso_page) return -ENOMEM; @@ -316,7 +316,7 @@ static int aarch32_alloc_sigpage(void) int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long sigpage; - sigpage = get_zeroed_page(GFP_ATOMIC); + sigpage = get_zeroed_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; -- cgit v1.2.3 From e9be47eab1cdaf0a2a3c0af96a6a4be1cf9a95c1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:35 +0000 Subject: arm64: vdso: Remove redundant calls to flush_dcache_page() flush_dcache_page() ensures that the 'PG_dcache_clean' flag for its 'page' argument is clear so that cache maintenance will be performed if the page is mapped into userspace with execute permissions. Newly allocated pages have this flag clear, so there is no need to call flush_dcache_page() for the compat vdso or signal pages. Remove the redundant calls. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-3-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 2d057a4dc787..421411981dc3 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -306,7 +306,6 @@ static int aarch32_alloc_kuser_vdso_page(void) memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); aarch32_vectors_page = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vectors_page); return 0; } @@ -322,7 +321,6 @@ static int aarch32_alloc_sigpage(void) memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); - flush_dcache_page(aarch32_sig_page); return 0; } -- cgit v1.2.3 From 7adbf10e29c2323f5eb6d6bdd13050c70900b993 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:36 +0000 Subject: arm64: compat: Allow signal page to be remapped For compatability with 32-bit Arm, allow the compat signal page to be remapped via mremap(). Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-4-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 421411981dc3..16bf0b46fb70 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -271,6 +271,14 @@ enum aarch32_map { static struct page *aarch32_vectors_page __ro_after_init; static struct page *aarch32_sig_page __ro_after_init; +static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.sigpage = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ @@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, + .mremap = aarch32_sigpage_mremap, }, [AA32_MAP_VVAR] = { .name = "[vvar]", -- cgit v1.2.3 From 6e554abd07002405fd9175284a10729e2f54be43 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:38 +0000 Subject: arm64: compat: Poison the compat sigpage Commit 9c698bff66ab ("ARM: ensure the signal page contains defined contents") poisoned the unused portions of the signal page for 32-bit Arm. Implement the same poisoning for the compat signal page on arm64 rather than using __GFP_ZERO. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-6-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 16bf0b46fb70..159b72a646ab 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -318,17 +318,20 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } +#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long sigpage; + __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); + void *sigpage; - sigpage = get_zeroed_page(GFP_KERNEL); + sigpage = (void *)__get_free_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; - memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); + memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); + memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); return 0; } -- cgit v1.2.3 From 338a743640e98d26baf4a1450473ddcfe0a0c025 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:25 +0000 Subject: arm64: don't use GENERIC_IRQ_MULTI_HANDLER In subsequent patches we want to allow irqchip drivers to register as FIQ handlers, with a set_handle_fiq() function. To keep the IRQ/FIQ paths similar, we want arm64 to provide both set_handle_irq() and set_handle_fiq(), rather than using GENERIC_IRQ_MULTI_HANDLER for the former. This patch adds an arm64-specific implementation of set_handle_irq(). There should be no functional change as a result of this patch. Signed-off-by: Marc Zyngier [Mark: use a single handler pointer] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/irq.h | 3 +++ arch/arm64/kernel/irq.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..e7d2405be71f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -110,7 +110,6 @@ config ARM64 select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI - select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..8391c6f6f746 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,9 @@ struct pt_regs; +int set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#define set_handle_irq set_handle_irq + static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index dfb1feab867d..ad63bd50fa7b 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,6 +71,17 @@ static void init_irq_stacks(void) } #endif +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; + +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return -EBUSY; + + handle_arch_irq = handle_irq; + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); -- cgit v1.2.3 From 8ff443cebffab22544b77a1f9fb3651a49bde300 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Mar 2021 11:56:26 +0000 Subject: arm64: irq: rework root IRQ handler registration If we accidentally unmask IRQs before we've registered a root IRQ handler, handle_arch_irq will be NULL, and the IRQ exception handler will branch to a bogus address. To make this easier to debug, this patch initialises handle_arch_irq to a default handler which will panic(), making such problems easier to debug. When we add support for FIQ handlers, we can follow the same approach. When we add support for a root FIQ handler, it's possible to have root IRQ handler without an root FIQ handler, and in theory the inverse is also possible. To permit this, and to keep the IRQ/FIQ registration logic similar, this patch removes the panic in the absence of a root IRQ controller. Instead, set_handle_irq() logs when a handler is registered, which is sufficient for debug purposes. Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/irq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ad63bd50fa7b..2fe0b535de30 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,14 +71,20 @@ static void init_irq_stacks(void) } #endif -void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +static void default_handle_irq(struct pt_regs *regs) +{ + panic("IRQ taken without a root IRQ handler\n"); +} + +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { - if (handle_arch_irq) + if (handle_arch_irq != default_handle_irq) return -EBUSY; handle_arch_irq = handle_irq; + pr_info("Root IRQ handler: %ps\n", handle_irq); return 0; } @@ -87,8 +93,6 @@ void __init init_IRQ(void) init_irq_stacks(); init_irq_scs(); irqchip_init(); - if (!handle_arch_irq) - panic("No interrupt controller found."); if (system_uses_irq_prio_masking()) { /* -- cgit v1.2.3 From 9eb563cdabe1d583c262042d5d44cc256f644543 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:27 +0000 Subject: arm64: entry: factor irq triage logic into macros In subsequent patches we'll allow an FIQ handler to be registered, and FIQ exceptions will need to be triaged very similarly to IRQ exceptions. So that we can reuse the existing logic, this patch factors the IRQ triage logic out into macros that can be reused for FIQ. The macros are named to follow the elX_foo_handler scheme used by the C exception handlers. For consistency with other top-level exception handlers, the kernel_entry/kernel_exit logic is not moved into the macros. As FIQ will use a different C handler, this handler name is provided as an argument to the macros. There should be no functional change as a result of this patch. Signed-off-by: Marc Zyngier [Mark: rework macros, commit message, rebase before DAIF rework] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 80 +++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 37 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..e235b0e4e468 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -491,8 +491,8 @@ tsk .req x28 // current thread_info /* * Interrupt handling. */ - .macro irq_handler - ldr_l x1, handle_arch_irq + .macro irq_handler, handler:req + ldr_l x1, \handler mov x0, sp irq_stack_entry blr x1 @@ -531,6 +531,45 @@ alternative_endif #endif .endm + .macro el1_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x1 + enable_da_f + + mov x0, sp + bl enter_el1_irq_or_nmi + + irq_handler \handler + +#ifdef CONFIG_PREEMPTION + ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA_F were cleared at start of handling. If anything is set in DAIF, + * we come back from an NMI, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl arm64_preempt_schedule_irq // irq en/disable is done inside +1: +#endif + + mov x0, sp + bl exit_el1_irq_or_nmi + .endm + + .macro el0_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x0 + user_exit_irqoff + enable_da_f + + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: + irq_handler \handler + .endm + .text /* @@ -660,32 +699,7 @@ SYM_CODE_END(el1_sync) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_entry 1 - gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f - - mov x0, sp - bl enter_el1_irq_or_nmi - - irq_handler - -#ifdef CONFIG_PREEMPTION - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - - mov x0, sp - bl exit_el1_irq_or_nmi - + el1_interrupt_handler handle_arch_irq kernel_exit 1 SYM_CODE_END(el1_irq) @@ -725,15 +739,7 @@ SYM_CODE_END(el0_error_compat) SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: - gic_prio_irq_setup pmr=x20, tmp=x0 - user_exit_irqoff - enable_da_f - - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: - irq_handler - + el0_interrupt_handler handle_arch_irq b ret_to_user SYM_CODE_END(el0_irq) -- cgit v1.2.3 From f0098155d337cab638cf18e37a3e9257d653d481 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 15 Mar 2021 11:56:28 +0000 Subject: arm64: Always keep DAIF.[IF] in sync Apple SoCs (A11 and newer) have some interrupt sources hardwired to the FIQ line. We implement support for this by simply treating IRQs and FIQs the same way in the interrupt vectors. To support these systems, the FIQ mask bit needs to be kept in sync with the IRQ mask bit, so both kinds of exceptions are masked together. No other platforms should be delivering FIQ exceptions right now, and we already unmask FIQ in normal process context, so this should not have an effect on other systems - if spurious FIQs were arriving, they would already panic the kernel. Signed-off-by: Hector Martin Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/assembler.h | 8 ++++---- arch/arm64/include/asm/daifflags.h | 10 +++++----- arch/arm64/include/asm/irqflags.h | 16 +++++++--------- arch/arm64/kernel/entry.S | 12 +++++++----- arch/arm64/kernel/process.c | 2 +- arch/arm64/kernel/smp.c | 1 + 7 files changed, 26 insertions(+), 25 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 880b9054d75c..934b9be582d2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..b76a71e84b7c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -40,9 +40,9 @@ msr daif, \flags .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -50,7 +50,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 1c26d7baa67f..5eb7af9c4557 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,8 +13,8 @@ #include #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); @@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..b57b9b1e4344 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -12,15 +12,13 @@ /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable", + "msr daifclr, #3 // arch_local_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e235b0e4e468..ce8d4dc416fb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -533,7 +533,7 @@ alternative_endif .macro el1_interrupt_handler, handler:req gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f + enable_da mov x0, sp bl enter_el1_irq_or_nmi @@ -544,8 +544,10 @@ alternative_endif ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count alternative_if ARM64_HAS_IRQ_PRIO_MASKING /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption + * DA were cleared at start of handling, and IF are cleared by + * the GIC irqchip driver using gic_arch_enable_irqs() for + * normal IRQs. If anything is set, it means we come back from + * an NMI instead of a normal IRQ, so skip preemption */ mrs x0, daif orr x24, x24, x0 @@ -562,7 +564,7 @@ alternative_else_nop_endif .macro el0_interrupt_handler, handler:req gic_prio_irq_setup pmr=x20, tmp=x0 user_exit_irqoff - enable_da_f + enable_da tbz x22, #55, 1f bl do_el0_irq_bp_hardening @@ -763,7 +765,7 @@ el0_error_naked: mov x0, sp mov x1, x25 bl do_serror - enable_da_f + enable_da b ret_to_user SYM_CODE_END(el0_error) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..a29028d3d46e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -84,7 +84,7 @@ static void noinstr __cpu_do_idle_irqprio(void) unsigned long daif_bits; daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT, daif); + write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); /* * Unmask PMR before going idle to make sure interrupts can diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 357590beaabb..dcd7041b2b07 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,6 +188,7 @@ static void init_gic_priority_masking(void) cpuflags = read_sysreg(daif); WARN_ON(!(cpuflags & PSR_I_BIT)); + WARN_ON(!(cpuflags & PSR_F_BIT)); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } -- cgit v1.2.3 From 3889ba70102ed8c609e42c1d3563c8c041ce0511 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Mar 2021 11:56:29 +0000 Subject: arm64: irq: allow FIQs to be handled On contemporary platforms we don't use FIQ, and treat any stray FIQ as a fatal event. However, some platforms have an interrupt controller wired to FIQ, and need to handle FIQ as part of regular operation. So that we can support both cases dynamically, this patch updates the FIQ exception handling code to operate the same way as the IRQ handling code, with its own handle_arch_fiq handler. Where a root FIQ handler is not registered, an unexpected FIQ exception will trigger the default FIQ handler, which will panic() as today. Where a root FIQ handler is registered, handling of the FIQ is deferred to that handler. As el0_fiq_invalid_compat is supplanted by el0_fiq, the former is removed. For !CONFIG_COMPAT builds we never expect to take an exception from AArch32 EL0, so we keep the common el0_fiq_invalid handler. Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irq.h | 1 + arch/arm64/kernel/entry.S | 30 +++++++++++++++++++++--------- arch/arm64/kernel/irq.c | 16 ++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8391c6f6f746..fac08e18bcd5 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -10,6 +10,7 @@ struct pt_regs; int set_handle_irq(void (*handle_irq)(struct pt_regs *)); #define set_handle_irq set_handle_irq +int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ce8d4dc416fb..a86f50de2c7b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -588,18 +588,18 @@ SYM_CODE_START(vectors) kernel_ventry 1, sync // Synchronous EL1h kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, fiq // FIQ EL1h kernel_ventry 1, error // Error EL1h kernel_ventry 0, sync // Synchronous 64-bit EL0 kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, fiq // FIQ 64-bit EL0 kernel_ventry 0, error // Error 64-bit EL0 #ifdef CONFIG_COMPAT kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 #else kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 @@ -665,12 +665,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid) inv_entry 0, BAD_ERROR SYM_CODE_END(el0_error_invalid) -#ifdef CONFIG_COMPAT -SYM_CODE_START_LOCAL(el0_fiq_invalid_compat) - inv_entry 0, BAD_FIQ, 32 -SYM_CODE_END(el0_fiq_invalid_compat) -#endif - SYM_CODE_START_LOCAL(el1_sync_invalid) inv_entry 1, BAD_SYNC SYM_CODE_END(el1_sync_invalid) @@ -705,6 +699,12 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_exit 1 SYM_CODE_END(el1_irq) +SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) + kernel_entry 1 + el1_interrupt_handler handle_arch_fiq + kernel_exit 1 +SYM_CODE_END(el1_fiq) + /* * EL0 mode handlers. */ @@ -731,6 +731,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) b el0_irq_naked SYM_CODE_END(el0_irq_compat) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) + kernel_entry 0, 32 + b el0_fiq_naked +SYM_CODE_END(el0_fiq_compat) + SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) kernel_entry 0, 32 b el0_error_naked @@ -745,6 +750,13 @@ el0_irq_naked: b ret_to_user SYM_CODE_END(el0_irq) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) + kernel_entry 0 +el0_fiq_naked: + el0_interrupt_handler handle_arch_fiq + b ret_to_user +SYM_CODE_END(el0_fiq) + SYM_CODE_START_LOCAL(el1_error) kernel_entry 1 mrs x1, esr_el1 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 2fe0b535de30..bda49430c9ea 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -76,7 +76,13 @@ static void default_handle_irq(struct pt_regs *regs) panic("IRQ taken without a root IRQ handler\n"); } +static void default_handle_fiq(struct pt_regs *regs) +{ + panic("FIQ taken without a root FIQ handler\n"); +} + void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; +void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq; int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { @@ -88,6 +94,16 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) return 0; } +int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *)) +{ + if (handle_arch_fiq != default_handle_fiq) + return -EBUSY; + + handle_arch_fiq = handle_fiq; + pr_info("Root FIQ handler: %ps\n", handle_fiq); + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); -- cgit v1.2.3 From 9eef29d8c31bdb8d6254b99e3dc741c75832fd6b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 23 Mar 2021 18:12:01 +0000 Subject: arm64: entry: remove test_irqs_unmasked macro We haven't needed the test_irqs_unmasked macro since commit: 105fc3352077bba5 ("arm64: entry: move el1 irq/nmi logic to C") ... and as we convert more of the entry logic to C it is decreasingly likely we'll need it in future, so let's remove the unused macro. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Acked-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210323181201.18889-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..a82af88e8599 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -499,20 +499,6 @@ tsk .req x28 // current thread_info irq_stack_exit .endm -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * Set res to 0 if irqs were unmasked in interrupted context. - * Otherwise set res to non-0 value. - */ - .macro test_irqs_unmasked res:req, pmr:req -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - sub \res, \pmr, #GIC_PRIO_IRQON -alternative_else - mov \res, xzr -alternative_endif - .endm -#endif - .macro gic_prio_kentry_setup, tmp:req #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if ARM64_HAS_IRQ_PRIO_MASKING -- cgit v1.2.3 From 18107f8a2df6bf1c6cac8d0713f757f866d5af51 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 12 Mar 2021 17:38:10 +0000 Subject: arm64: Support execute-only permissions with Enhanced PAN Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. Absence of such support was a reason for 24cecc377463 ("arm64: Revert support for execute-only user mappings"). Thus now it can be revisited and re-enabled. Cc: Kees Cook Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210312173811.58284-2-vladimir.murzin@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 17 +++++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/pgtable-prot.h | 5 +++-- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++++++++-------- arch/arm64/include/asm/sysreg.h | 3 ++- arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ arch/arm64/mm/fault.c | 18 +++++++++++++++++- mm/mmap.c | 6 ++++++ 8 files changed, 82 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..c4d7bafcea87 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1058,6 +1058,9 @@ config SYS_SUPPORTS_HUGETLBFS config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 @@ -1681,6 +1684,20 @@ config ARM64_MTE endmenu +menu "ARMv8.7 architectural features" + +config ARM64_EPAN + bool "Enable support for Enhanced Privileged Access Never (EPAN)" + default y + depends on ARM64_PAN + help + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..9e3ec4dd56d8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_HAS_EPAN 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..fab2f573f7a4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 47027796c2f9..0b10204e72fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been @@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check). PROT_NONE mappings do not have the PTE_VALID bit - * set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..94291c7c6443 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -597,6 +597,7 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -637,7 +638,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..2ab04967dca7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1821,6 +1821,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_EPAN + { + .desc = "Enhanced Privileged Access Never", + .capability = ARM64_HAS_EPAN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 3, + }, +#endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f37d4e3830b7..871c82ab0a30 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault; - unsigned long vm_flags = VM_ACCESS_FLAGS; + unsigned long vm_flags; unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); @@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; + /* + * vm_flags tells us what bits we must have in vma->vm_flags + * for the fault to be benign, __do_page_fault() would check + * vma->vm_flags & vm_flags and returns an error if the + * intersection is empty + */ if (is_el0_instruction_abort(esr)) { + /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { + /* It was write fault */ vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; + } else { + /* It was read fault */ + vm_flags = VM_READ; + /* Write implies read */ + vm_flags |= VM_WRITE; + /* If EPAN is absent then exec implies read */ + if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags |= VM_EXEC; } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { diff --git a/mm/mmap.c b/mm/mmap.c index 3f287599a7a3..1d96a21acb2f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -93,6 +93,12 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE (with Enhanced PAN supported): + * r: (no) no + * w: (no) no + * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, -- cgit v1.2.3 From b07f3499661c61f03478c99ff3fcb2381ddb9e38 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 19 Mar 2021 17:40:22 +0000 Subject: arm64: stacktrace: Move start_backtrace() out of the header Currently start_backtrace() is a static inline function in the header. Since it really shouldn't be sufficiently performance critical that we actually need to have it inlined move it into a C file, this will save anyone else scratching their head about why it is defined in the header. As far as I can see it's only there because it was factored out of the various callers. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210319174022.33051-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 24 ++---------------------- arch/arm64/kernel/stacktrace.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 22 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255..4b33ca620679 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, return false; } -static inline void start_backtrace(struct stackframe *frame, - unsigned long fp, unsigned long pc) -{ - frame->fp = fp; - frame->pc = pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame->graph = 0; -#endif - - /* - * Prime the first unwind. - * - * In unwind_frame() we'll check that the FP points to a valid stack, - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be - * treated as a transition to whichever stack that happens to be. The - * prev_fp value won't be used, but we set it to 0 such that it is - * definitely not an accessible stack address. - */ - bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); - frame->prev_fp = 0; - frame->prev_type = STACK_TYPE_UNKNOWN; -} +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ad20981dfda4..2a1a7a281d34 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -32,6 +32,30 @@ * add sp, sp, #0x10 */ + +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) +{ + frame->fp = fp; + frame->pc = pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame->graph = 0; +#endif + + /* + * Prime the first unwind. + * + * In unwind_frame() we'll check that the FP points to a valid stack, + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be + * treated as a transition to whichever stack that happens to be. The + * prev_fp value won't be used, but we set it to 0 such that it is + * definitely not an accessible stack address. + */ + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; +} + /* * Unwind from one frame record (A) to the next frame record (B). * -- cgit v1.2.3 From 31d02e7ab00873befd2cfb6e44581490d947c38b Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:21 +0800 Subject: arm64: kaslr: support randomized module area with KASAN_VMALLOC After KASAN_VMALLOC works in arm64, we can randomize module region into vmalloc area now. Test: VMALLOC area ffffffc010000000 fffffffdf0000000 before the patch: module_alloc_base/end ffffffc008b80000 ffffffc010000000 after the patch: module_alloc_base/end ffffffdcf4bed000 ffffffc010000000 And the function that insmod some modules is fine. Suggested-by: Ard Biesheuvel Signed-off-by: Lecopzer Chen Link: https://lore.kernel.org/r/20210324040522.15548-5-lecopzer.chen@mediatek.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 18 ++++++++++-------- arch/arm64/kernel/module.c | 16 +++++++++------- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 27f8939deb1b..341342b207f6 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && + (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS))) /* - * KASAN does not expect the module region to intersect the - * vmalloc region, since shadow memory is allocated for each - * module at load time, whereas the vmalloc region is shadowed - * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled, and put the kernel well within - * 4 GB of the module region. + * KASAN without KASAN_VMALLOC does not expect the module region + * to intersect the vmalloc region, since shadow memory is + * allocated for each module at load time, whereas the vmalloc + * region is shadowed by KASAN zero pages. So keep modules + * out of the vmalloc region if KASAN is enabled without + * KASAN_VMALLOC, and put the kernel well within 4 GB of the + * module region. */ return offset % SZ_2G; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fe21e0f06492..b5ec010c481f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -40,14 +40,16 @@ void *module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + (IS_ENABLED(CONFIG_KASAN_VMALLOC) || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) /* - * KASAN can only deal with module allocations being served - * from the reserved module region, since the remainder of - * the vmalloc region is already backed by zero shadow pages, - * and punching holes into it is non-trivial. Since the module - * region is not randomized when KASAN is enabled, it is even + * KASAN without KASAN_VMALLOC can only deal with module + * allocations being served from the reserved module region, + * since the remainder of the vmalloc region is already + * backed by zero shadow pages, and punching holes into it + * is non-trivial. Since the module region is not randomized + * when KASAN is enabled without KASAN_VMALLOC, it is even * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ -- cgit v1.2.3 From 2c2e21e78a945b174629944281997bbcb839e6bb Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 1 Apr 2021 19:16:41 +0800 Subject: arm64: perf: Remove redundant initialization in perf_event.c The initialization of value in function armv8pmu_read_hw_counter() and armv8pmu_read_counter() seem redundant, as they are soon updated. So, We can remove them. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1617275801-1980-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4658fcf88c2b..f594957e29bd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx) static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) { int idx = event->hw.idx; - u64 val = 0; + u64 val = armv8pmu_read_evcntr(idx); - val = armv8pmu_read_evcntr(idx); if (armv8pmu_event_is_chained(event)) val = (val << 32) | armv8pmu_read_evcntr(idx - 1); return val; @@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u64 value = 0; + u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); -- cgit v1.2.3 From cccb78ce89c45a4414db712be4986edfb92434bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 12 Mar 2021 19:03:13 +0000 Subject: arm64/sve: Rework SVE access trap to convert state in registers When we enable SVE usage in userspace after taking a SVE access trap we need to ensure that the portions of the register state that are not shared with the FPSIMD registers are zeroed. Currently we do this by forcing the FPSIMD registers to be saved to the task struct and converting them there. This is wasteful in the common case where the task state is loaded into the registers and we will immediately return to userspace since we can initialise the SVE state directly in registers instead of accessing multiple copies of the register state in memory. Instead in that common case do the conversion in the registers and update the task metadata so that we can return to userspace without spilling the register state to memory unless there is some other reason to do so. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210312190313.24598-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/entry-fpsimd.S | 5 +++++ arch/arm64/kernel/fpsimd.c | 26 +++++++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622a..ebb263b2d3b1 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -73,6 +73,7 @@ extern void sve_flush_live(void); extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); +extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 2ca395c25448..3ecec60d3295 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -48,6 +48,11 @@ SYM_FUNC_START(sve_get_vl) ret SYM_FUNC_END(sve_get_vl) +SYM_FUNC_START(sve_set_vq) + sve_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sve_set_vq) + /* * Load SVE state from FPSIMD state. * diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 062b21f30f94..0f58e45bd3d1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -926,9 +926,8 @@ void fpsimd_release_task(struct task_struct *dead_task) * Trapped SVE access * * Storage is allocated for the full SVE state, the current FPSIMD - * register contents are migrated across, and TIF_SVE is set so that - * the SVE access trap will be disabled the next time this task - * reaches ret_to_user. + * register contents are migrated across, and the access trap is + * disabled. * * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during @@ -946,15 +945,24 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) get_cpu_fpsimd_context(); - fpsimd_save(); - - /* Force ret_to_user to reload the registers: */ - fpsimd_flush_task_state(current); - - fpsimd_to_sve(current); if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); + sve_flush_live(); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } + put_cpu_fpsimd_context(); } -- cgit v1.2.3 From a7dcf58ae5d2f7c6f1bbe13290897f539f9cd75b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 13:54:49 +0800 Subject: arm64: Add __init section marker to some functions They are not needed after booting, so mark them as __init to move them to the .init section. Signed-off-by: Jisheng Zhang Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20210330135449.4dcffd7f@xhacker.debian Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptdump.h | 2 +- arch/arm64/kernel/vdso.c | 4 ++-- arch/arm64/mm/ptdump.c | 4 ++-- arch/arm64/mm/ptdump_debugfs.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 38187f74e089..b1dd7ecff7ef 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,7 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS -void ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index cee5d04ea9ad..d1fa288518a7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -86,7 +86,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static int __vdso_init(enum vdso_abi abi) +static int __init __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; @@ -326,7 +326,7 @@ static int aarch32_alloc_sigpage(void) return 0; } -static int __aarch32_alloc_vdso_pages(void) +static int __init __aarch32_alloc_vdso_pages(void) { if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 0e050d76b83a..a50e92ea1878 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -337,7 +337,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } -static void ptdump_initialize(void) +static void __init ptdump_initialize(void) { unsigned i, j; @@ -381,7 +381,7 @@ void ptdump_check_wx(void) pr_info("Checked W+X mappings: passed, no W+X pages found\n"); } -static int ptdump_init(void) +static int __init ptdump_init(void) { address_markers[PAGE_END_NR].start_address = PAGE_END; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index d29d722ec3ec..68bf1a125502 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -16,7 +16,7 @@ static int ptdump_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(ptdump); -void ptdump_debugfs_register(struct ptdump_info *info, const char *name) +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name) { debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); } -- cgit v1.2.3 From cac642c12a805ae7565a263b59fb94ad19e81952 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:08 +0100 Subject: arm64: cpufeature: Allow early filtering of feature override Some CPUs are broken enough that some overrides need to be rejected at the earliest opportunity. In some cases, that's right at cpu feature override time. Provide the necessary infrastructure to filter out overrides, and to report such filtered out overrides to the core cpufeature code. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210408131010.1109027-2-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 17 +++++++++++++++++ arch/arm64/kernel/cpufeature.c | 6 ++++++ arch/arm64/kernel/idreg-override.c | 13 +++++++++++++ 3 files changed, 36 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..338840c00e8e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,23 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +/* + * Describe the early feature override to the core override code: + * + * @val Values that are to be merged into the final + * sanitised value of the register. Only the bitfields + * set to 1 in @mask are valid + * @mask Mask of the features that are overridden by @val + * + * A @mask field set to full-1 indicates that the corresponding field + * in @val is a valid override. + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-0 denotes that this field has no override + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-1 denotes thath this field has an invalid override. + */ struct arm64_ftr_override { u64 val; u64 mask; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..6de15deaa912 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -809,6 +809,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift, str, tmp); + } else if ((ftr_mask & reg->override->val) == ftr_mask) { + reg->override->val &= ~ftr_mask; + pr_warn("%s[%d:%d]: impossible override, ignored\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift); } val = arm64_ftr_set_value(ftrp, val, ftr_new); diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 83f1c4b92095..be92fcd319a1 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -25,6 +25,7 @@ struct ftr_set_desc { struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + bool (*filter)(u64 val); } fields[]; }; @@ -124,6 +125,18 @@ static void __init match_options(const char *cmdline) if (find_field(cmdline, regs[i], f, &v)) continue; + /* + * If an override gets filtered out, advertise + * it by setting the value to 0xf, but + * clearing the mask... Yes, this is fragile. + */ + if (regs[i]->fields[f].filter && + !regs[i]->fields[f].filter(v)) { + regs[i]->override->val |= mask; + regs[i]->override->mask &= ~mask; + continue; + } + regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; -- cgit v1.2.3 From 31a32b49b80f79cbb84a9c948c5609c6fc044443 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:09 +0100 Subject: arm64: Cope with CPUs stuck in VHE mode It seems that the CPUs part of the SoC known as Apple M1 have the terrible habit of being stuck with HCR_EL2.E2H==1, in violation of the architecture. Try and work around this deplorable state of affairs by detecting the stuck bit early and short-circuit the nVHE dance. Additional filtering code ensures that attempts at switching to nVHE from the command-line are also ignored. It is still unknown whether there are many more such nuggets to be found... Reported-by: Hector Martin Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210408131010.1109027-3-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 39 +++++++++++++++++++++++++++++++++++--- arch/arm64/kernel/hyp-stub.S | 8 ++++---- arch/arm64/kernel/idreg-override.c | 13 ++++++++++++- 3 files changed, 52 insertions(+), 8 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 840bda1869e9..96873dfa67fd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + /* + * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, + * making it impossible to start in nVHE mode. Is that + * compliant with the architecture? Absolutely not! + */ + mrs x0, hcr_el2 + and x0, x0, #HCR_E2H + cbz x0, 1f + + /* Switching to VHE requires a sane SCTLR_EL1 as a start */ + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + /* + * Force an eret into a helper "function", and let it return + * to our original caller... This makes sure that we have + * initialised the basic PSTATE state. + */ + mov x0, #INIT_PSTATE_EL2 + msr spsr_el1, x0 + adr x0, __cpu_stick_to_vhe + msr elr_el1, x0 + eret + +1: + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret + +__cpu_stick_to_vhe: + mov x0, #HVC_VHE_RESTART + hvc #0 + mov x0, #BOOT_CPU_MODE_EL2 + ret SYM_FUNC_END(init_kernel_el) /* diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 5eccbd62fec8..9c9b4d8fc395 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors) ventry el2_fiq_invalid // FIQ EL2t ventry el2_error_invalid // Error EL2t - ventry el2_sync_invalid // Synchronous EL2h + ventry elx_sync // Synchronous EL2h ventry el2_irq_invalid // IRQ EL2h ventry el2_fiq_invalid // FIQ EL2h ventry el2_error_invalid // Error EL2h - ventry el1_sync // Synchronous 64-bit EL1 + ventry elx_sync // Synchronous 64-bit EL1 ventry el1_irq_invalid // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 ventry el1_error_invalid // Error 64-bit EL1 @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 -SYM_CODE_START_LOCAL(el1_sync) +SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync) 9: mov x0, xzr eret -SYM_CODE_END(el1_sync) +SYM_CODE_END(elx_sync) // nVHE? No way! Give me the real thing! SYM_CODE_START_LOCAL(mutate_to_vhe) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index be92fcd319a1..e628c8ce1ffe 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -29,11 +29,22 @@ struct ftr_set_desc { } fields[]; }; +static bool __init mmfr1_vh_filter(u64 val) +{ + /* + * If we ever reach this point while running VHE, we're + * guaranteed to be on one of these funky, VHE-stuck CPUs. If + * the user was trying to force nVHE on us, proceed with + * attitude adjustment. + */ + return !(is_kernel_in_hyp_mode() && val == 0); +} + static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT }, + { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, {} }, }; -- cgit v1.2.3 From 2d726d0db6ac479d91bf74490455badd34af6b1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:10 +0100 Subject: arm64: Get rid of CONFIG_ARM64_VHE CONFIG_ARM64_VHE was introduced with ARMv8.1 (some 7 years ago), and has been enabled by default for almost all that time. Given that newer systems that are VHE capable are finally becoming available, and that some systems are even incapable of not running VHE, drop the configuration altogether. Anyone willing to stick to non-VHE on VHE hardware for obscure reasons should use the 'kvm-arm.mode=nvhe' command-line option. Suggested-by: Will Deacon Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210408131010.1109027-4-maz@kernel.org Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 3 +-- arch/arm64/Kconfig | 20 -------------------- arch/arm64/kernel/cpufeature.c | 4 ---- arch/arm64/kernel/hyp-stub.S | 2 -- 4 files changed, 1 insertion(+), 28 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 04545725f187..18f8bb3024f4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2279,8 +2279,7 @@ state is kept private from the host. Not valid if the kernel is running in EL2. - Defaults to VHE/nVHE based on hardware support and - the value of CONFIG_ARM64_VHE. + Defaults to VHE/nVHE based on hardware support. kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..e9fbb0b45793 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1416,19 +1416,6 @@ config ARM64_USE_LSE_ATOMICS built with binutils >= 2.25 in order for the new instructions to be used. -config ARM64_VHE - bool "Enable support for Virtualization Host Extensions (VHE)" - default y - help - Virtualization Host Extensions (VHE) allow the kernel to run - directly at EL2 (instead of EL1) on processors that support - it. This leads to better performance for KVM, as they reduce - the cost of the world switch. - - Selecting this option allows the VHE feature to be detected - at runtime, and does not affect processors that do not - implement this feature. - endmenu menu "ARMv8.2 architectural features" @@ -1684,7 +1671,6 @@ endmenu config ARM64_SVE bool "ARM Scalable Vector Extension support" default y - depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1713,12 +1699,6 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. - CPUs that support SVE are architecturally required to support the - Virtualization Host Extensions (VHE), so the kernel makes no - provision for supporting SVE alongside KVM without VHE enabled. - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support - KVM in the same kernel image. - config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6de15deaa912..fa6023ac3548 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1623,7 +1623,6 @@ int get_cpu_with_amu_feat(void) } #endif -#ifdef CONFIG_ARM64_VHE static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); @@ -1642,7 +1641,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -#endif static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) { @@ -1845,7 +1843,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -1853,7 +1850,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, -#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9c9b4d8fc395..74ad3db061d1 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors) * Entry point to switch to VHE if deemed capable */ SYM_FUNC_START(switch_to_vhe) -#ifdef CONFIG_ARM64_VHE // Need to have booted at EL2 adr_l x1, __boot_cpu_mode ldr w0, [x1] @@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe) mov x0, #HVC_VHE_RESTART hvc #0 1: -#endif ret SYM_FUNC_END(switch_to_vhe) -- cgit v1.2.3 From f3b7deef8dcaf84fd659108ae300626ea5420f87 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:11 +0000 Subject: arm64: mte: Add asynchronous mode support MTE provides an asynchronous mode for detecting tag exceptions. In particular instead of triggering a fault the arm64 core updates a register which is checked by the kernel after the asynchronous tag check fault has occurred. Add support for MTE asynchronous mode. The exception handling mechanism will be added with a future patch. Note: KASAN HW activates async mode via kasan.mode kernel parameter. The default mode is set to synchronous. The code that verifies the status of TFSR_EL1 will be added with a future patch. Cc: Will Deacon Reviewed-by: Catalin Marinas Reviewed-by: Andrey Konovalov Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-2-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 4 +++- arch/arm64/include/asm/mte-kasan.h | 9 +++++++-- arch/arm64/kernel/mte.c | 16 ++++++++++++++-- 3 files changed, 24 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0aabc3be9a75..8fbc8dab044f 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,7 +243,9 @@ static inline const void *__tag_set(const void *addr, u8 tag) } #ifdef CONFIG_KASAN_HW_TAGS -#define arch_enable_tagging() mte_enable_kernel() +#define arch_enable_tagging_sync() mte_enable_kernel_sync() +#define arch_enable_tagging_async() mte_enable_kernel_async() +#define arch_enable_tagging() arch_enable_tagging_sync() #define arch_set_tagging_report_once(state) mte_set_report_once(state) #define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 7ab500e2ad17..4acf8bf41cad 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -77,7 +77,8 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) } while (curr != end); } -void mte_enable_kernel(void); +void mte_enable_kernel_sync(void); +void mte_enable_kernel_async(void); void mte_init_tags(u64 max_tag); void mte_set_report_once(bool state); @@ -104,7 +105,11 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) { } -static inline void mte_enable_kernel(void) +static inline void mte_enable_kernel_sync(void) +{ +} + +static inline void mte_enable_kernel_async(void) { } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b3c70a612c7a..fa755cf94e01 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -107,11 +107,23 @@ void mte_init_tags(u64 max_tag) write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); } -void mte_enable_kernel(void) +static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ - sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC); + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf); isb(); + + pr_info_once("MTE: enabled in %s mode at EL1\n", mode); +} + +void mte_enable_kernel_sync(void) +{ + __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); +} + +void mte_enable_kernel_async(void) +{ + __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC); } void mte_set_report_once(bool state) -- cgit v1.2.3 From e60beb95c08baf29416d0e06a9e1d4887faf5d1c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:15 +0000 Subject: arm64: mte: Enable TCO in functions that can read beyond buffer limits load_unaligned_zeropad() and __get/put_kernel_nofault() functions can read past some buffer limits which may include some MTE granule with a different tag. When MTE async mode is enabled, the load operation crosses the boundaries and the next granule has a different tag the PE sets the TFSR_EL1.TF1 bit as if an asynchronous tag fault is happened. Enable Tag Check Override (TCO) in these functions before the load and disable it afterwards to prevent this to happen. Note: The same condition can be hit in MTE sync mode but we deal with it through the exception handling. In the current implementation, mte_async_mode flag is set only at boot time but in future kasan might acquire some runtime features that that change the mode dynamically, hence we disable it when sync mode is selected for future proof. Cc: Will Deacon Reported-by: Branislav Rankov Tested-by: Branislav Rankov Reviewed-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-6-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 15 +++++++++++++++ arch/arm64/include/asm/uaccess.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/word-at-a-time.h | 4 ++++ arch/arm64/kernel/mte.c | 22 ++++++++++++++++++++++ 4 files changed, 63 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9b557a457f24..8603c6636a7d 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -90,5 +90,20 @@ static inline void mte_assign_mem_tag_range(void *addr, size_t size) #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_KASAN_HW_TAGS +/* Whether the MTE asynchronous mode is enabled. */ +DECLARE_STATIC_KEY_FALSE(mte_async_mode); + +static inline bool system_uses_mte_async_mode(void) +{ + return static_branch_unlikely(&mte_async_mode); +} +#else +static inline bool system_uses_mte_async_mode(void) +{ + return false; +} +#endif /* CONFIG_KASAN_HW_TAGS */ + #endif /* __ASSEMBLY__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 0deb88467111..b5f08621fa29 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -188,6 +189,23 @@ static inline void __uaccess_enable_tco(void) ARM64_MTE, CONFIG_KASAN_HW_TAGS)); } +/* + * These functions disable tag checking only if in MTE async mode + * since the sync mode generates exceptions synchronously and the + * nofault or load_unaligned_zeropad can handle them. + */ +static inline void __uaccess_disable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_disable_tco(); +} + +static inline void __uaccess_enable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_enable_tco(); +} + static inline void uaccess_disable_privileged(void) { __uaccess_disable_tco(); @@ -307,8 +325,10 @@ do { \ do { \ int __gkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(dst)), \ (__force type *)(src), __gkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) @@ -380,8 +400,10 @@ do { \ do { \ int __pkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(src)), \ (__force type *)(dst), __pkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__pkn_err)) \ goto err_label; \ } while(0) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b5909..c62d9fa791aa 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -55,6 +55,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long ret, offset; + __uaccess_enable_tco_async(); + /* Load word from unaligned pointer addr */ asm( "1: ldr %0, %3\n" @@ -76,6 +78,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) : "=&r" (ret), "=&r" (offset) : "r" (addr), "Q" (*(unsigned long *)addr)); + __uaccess_disable_tco_async(); + return ret; } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index fa755cf94e01..9362928ba0d5 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,6 +26,10 @@ u64 gcr_kernel_excl __ro_after_init; static bool report_fault_once = true; +/* Whether the MTE asynchronous mode is enabled. */ +DEFINE_STATIC_KEY_FALSE(mte_async_mode); +EXPORT_SYMBOL_GPL(mte_async_mode); + static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) { pte_t old_pte = READ_ONCE(*ptep); @@ -118,12 +122,30 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) void mte_enable_kernel_sync(void) { + /* + * Make sure we enter this function when no PE has set + * async mode previously. + */ + WARN_ONCE(system_uses_mte_async_mode(), + "MTE async mode enabled system wide!"); + __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); } void mte_enable_kernel_async(void) { __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC); + + /* + * MTE async mode is set system wide by the first PE that + * executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_mode()) + static_branch_enable(&mte_async_mode); } void mte_set_report_once(bool state) -- cgit v1.2.3 From d8969752cc4e3294074ff0582de42d0e3c982eba Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:16 +0000 Subject: arm64: mte: Conditionally compile mte_enable_kernel_*() mte_enable_kernel_*() are not needed if KASAN_HW is disabled. Add ash defines around the functions to conditionally compile the functions. Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210315132019.33202-7-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/mte.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 9362928ba0d5..50f0724c8d8f 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,9 +26,11 @@ u64 gcr_kernel_excl __ro_after_init; static bool report_fault_once = true; +#ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DEFINE_STATIC_KEY_FALSE(mte_async_mode); EXPORT_SYMBOL_GPL(mte_async_mode); +#endif static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) { @@ -120,6 +122,7 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) pr_info_once("MTE: enabled in %s mode at EL1\n", mode); } +#ifdef CONFIG_KASAN_HW_TAGS void mte_enable_kernel_sync(void) { /* @@ -147,6 +150,7 @@ void mte_enable_kernel_async(void) if (!system_uses_mte_async_mode()) static_branch_enable(&mte_async_mode); } +#endif void mte_set_report_once(bool state) { -- cgit v1.2.3 From 65812c6921cc849d86811147038dd246fa0ea18c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:17 +0000 Subject: arm64: mte: Enable async tag check fault MTE provides a mode that asynchronously updates the TFSR_EL1 register when a tag check exception is detected. To take advantage of this mode the kernel has to verify the status of the register at: 1. Context switching 2. Return to user/EL0 (Not required in entry from EL0 since the kernel did not run) 3. Kernel entry from EL1 4. Kernel exit to EL1 If the register is non-zero a trace is reported. Add the required features for EL1 detection and reporting. Note: ITFSB bit is set in the SCTLR_EL1 register hence it guaranties that the indirect writes to TFSR_EL1 are synchronized at exception entry to EL1. On the context switch path the synchronization is guarantied by the dsb() in __switch_to(). The dsb(nsh) in mte_check_tfsr_exit() is provisional pending confirmation by the architects. Cc: Will Deacon Reviewed-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-8-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 29 +++++++++++++++++++++++++++++ arch/arm64/kernel/entry-common.c | 6 ++++++ arch/arm64/kernel/mte.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 8603c6636a7d..9a929620ca5d 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -98,11 +98,40 @@ static inline bool system_uses_mte_async_mode(void) { return static_branch_unlikely(&mte_async_mode); } + +void mte_check_tfsr_el1(void); + +static inline void mte_check_tfsr_entry(void) +{ + mte_check_tfsr_el1(); +} + +static inline void mte_check_tfsr_exit(void) +{ + /* + * The asynchronous faults are sync'ed automatically with + * TFSR_EL1 on kernel entry but for exit an explicit dsb() + * is required. + */ + dsb(nsh); + isb(); + + mte_check_tfsr_el1(); +} #else static inline bool system_uses_mte_async_mode(void) { return false; } +static inline void mte_check_tfsr_el1(void) +{ +} +static inline void mte_check_tfsr_entry(void) +{ +} +static inline void mte_check_tfsr_exit(void) +{ +} #endif /* CONFIG_KASAN_HW_TAGS */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9d3588450473..a1ec351c36bd 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -37,6 +37,8 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter_check_tick(); trace_hardirqs_off_finish(); + + mte_check_tfsr_entry(); } /* @@ -47,6 +49,8 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) { lockdep_assert_irqs_disabled(); + mte_check_tfsr_exit(); + if (interrupts_enabled(regs)) { if (regs->exit_rcu) { trace_hardirqs_on_prepare(); @@ -293,6 +297,8 @@ asmlinkage void noinstr enter_from_user_mode(void) asmlinkage void noinstr exit_to_user_mode(void) { + mte_check_tfsr_exit(); + trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(CALLER_ADDR0); user_enter_irqoff(); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 50f0724c8d8f..b6336fbe4c14 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -162,6 +162,29 @@ bool mte_report_once(void) return READ_ONCE(report_fault_once); } +#ifdef CONFIG_KASAN_HW_TAGS +void mte_check_tfsr_el1(void) +{ + u64 tfsr_el1; + + if (!system_supports_mte()) + return; + + tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); + + if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) { + /* + * Note: isb() is not required after this direct write + * because there is no indirect read subsequent to it + * (per ARM DDI 0487F.c table D13-1). + */ + write_sysreg_s(0, SYS_TFSR_EL1); + + kasan_report_async(); + } +} +#endif + static void update_sctlr_el1_tcf0(u64 tcf0) { /* ISB required for the kernel uaccess routines */ @@ -227,6 +250,19 @@ void mte_thread_switch(struct task_struct *next) /* avoid expensive SCTLR_EL1 accesses if no change */ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + else + isb(); + + /* + * Check if an async tag exception occurred at EL1. + * + * Note: On the context switch path we rely on the dsb() present + * in __switch_to() to guarantee that the indirect writes to TFSR_EL1 + * are synchronized before this point. + * isb() above is required for the same reason. + * + */ + mte_check_tfsr_el1(); } void mte_suspend_exit(void) -- cgit v1.2.3 From eab0e6e17d876381b4d47996eef3b5fd46ea4011 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:18 +0000 Subject: arm64: mte: Report async tag faults before suspend When MTE async mode is enabled TFSR_EL1 contains the accumulative asynchronous tag check faults for EL1 and EL0. During the suspend/resume operations the firmware might perform some operations that could change the state of the register resulting in a spurious tag check fault report. Report asynchronous tag faults before suspend and clear the TFSR_EL1 register after resume to prevent this to happen. Cc: Will Deacon Cc: Lorenzo Pieralisi Reviewed-by: Catalin Marinas Reviewed-by: Lorenzo Pieralisi Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-9-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 4 ++++ arch/arm64/kernel/mte.c | 16 ++++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 23 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9a929620ca5d..a38abc15186c 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -41,6 +41,7 @@ void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void flush_mte_state(void); void mte_thread_switch(struct task_struct *next); +void mte_suspend_enter(void); void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); @@ -66,6 +67,9 @@ static inline void flush_mte_state(void) static inline void mte_thread_switch(struct task_struct *next) { } +static inline void mte_suspend_enter(void) +{ +} static inline void mte_suspend_exit(void) { } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b6336fbe4c14..820bad94870e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -265,6 +265,22 @@ void mte_thread_switch(struct task_struct *next) mte_check_tfsr_el1(); } +void mte_suspend_enter(void) +{ + if (!system_supports_mte()) + return; + + /* + * The barriers are required to guarantee that the indirect writes + * to TFSR_EL1 are synchronized before we report the state. + */ + dsb(nsh); + isb(); + + /* Report SYS_TFSR_EL1 before suspend entry */ + mte_check_tfsr_el1(); +} + void mte_suspend_exit(void) { if (!system_supports_mte()) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index d7564891ffe1..6fdc8292b4f5 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -91,6 +91,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) unsigned long flags; struct sleep_stack_data state; + /* Report any MTE async fault before going to suspend */ + mte_suspend_enter(); + /* * From this point debug exceptions are disabled to prevent * updates to mdscr register (saved and restored along with -- cgit v1.2.3 From 13150149aa6ded1e6bbe0025beac6e12604dd87c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Mar 2021 10:01:12 +0100 Subject: arm64: fpsimd: run kernel mode NEON with softirqs disabled Kernel mode NEON can be used in task or softirq context, but only in a non-nesting manner, i.e., softirq context is only permitted if the interrupt was not taken at a point where the kernel was using the NEON in task context. This means all users of kernel mode NEON have to be aware of this limitation, and either need to provide scalar fallbacks that may be much slower (up to 20x for AES instructions) and potentially less safe, or use an asynchronous interface that defers processing to a later time when the NEON is guaranteed to be available. Given that grabbing and releasing the NEON is cheap, we can relax this restriction, by increasing the granularity of kernel mode NEON code, and always disabling softirq processing while the NEON is being used in task context. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210302090118.30666-4-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/crypto/aes-modes.S | 2 +- arch/arm64/crypto/sha1-ce-core.S | 2 +- arch/arm64/crypto/sha2-ce-core.S | 2 +- arch/arm64/crypto/sha3-ce-core.S | 4 ++-- arch/arm64/crypto/sha512-ce-core.S | 2 +- arch/arm64/include/asm/assembler.h | 28 +++++++++++++++++++++------- arch/arm64/kernel/asm-offsets.c | 2 ++ arch/arm64/kernel/fpsimd.c | 4 ++-- 8 files changed, 31 insertions(+), 15 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index bbdb54702aa7..ab6c14ef9f4e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -700,7 +700,7 @@ AES_FUNC_START(aes_mac_update) cbz w5, .Lmacout encrypt_block v0, w2, x1, x7, w8 st1 {v0.16b}, [x4] /* return dg */ - cond_yield .Lmacout, x7 + cond_yield .Lmacout, x7, x8 b .Lmacloop4x .Lmac1x: add w3, w3, #4 diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 8c02bbc2684e..889ca0f8972b 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -121,7 +121,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) add dgav.4s, dgav.4s, dg0v.4s cbz w2, 2f - cond_yield 3f, x5 + cond_yield 3f, x5, x6 b 0b /* diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 6cdea7d56059..491179922f49 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -129,7 +129,7 @@ CPU_LE( rev32 v19.16b, v19.16b ) /* handled all input blocks? */ cbz w2, 2f - cond_yield 3f, x5 + cond_yield 3f, x5, x6 b 0b /* diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 6f5208414fe3..9c77313f5a60 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform) eor v0.16b, v0.16b, v31.16b cbnz w8, 3b - cond_yield 3f, x8 + cond_yield 4f, x8, x9 cbnz w2, 0b /* save state */ -3: st1 { v0.1d- v3.1d}, [x0], #32 +4: st1 { v0.1d- v3.1d}, [x0], #32 st1 { v4.1d- v7.1d}, [x0], #32 st1 { v8.1d-v11.1d}, [x0], #32 st1 {v12.1d-v15.1d}, [x0], #32 diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index d6e7f6c95fa6..b6a3a36e15f5 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -195,7 +195,7 @@ CPU_LE( rev64 v19.16b, v19.16b ) add v10.2d, v10.2d, v2.2d add v11.2d, v11.2d, v3.2d - cond_yield 3f, x4 + cond_yield 3f, x4, x5 /* handled all input blocks? */ cbnz w2, 0b diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7b076ccd1a54..6ac38f7cf824 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -701,19 +702,32 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Check whether preempt-disabled code should yield as soon as it - * is able. This is the case if re-enabling preemption a single - * time results in a preempt count of zero, and the TIF_NEED_RESCHED - * flag is set. (Note that the latter is stored negated in the - * top word of the thread_info::preempt_count field) + * Check whether preempt/bh-disabled asm code should yield as soon as + * it is able. This is the case if we are currently running in task + * context, and either a softirq is pending, or the TIF_NEED_RESCHED + * flag is set and re-enabling preemption a single time would result in + * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is + * stored negated in the top word of the thread_info::preempt_count + * field) */ - .macro cond_yield, lbl:req, tmp:req -#ifdef CONFIG_PREEMPTION + .macro cond_yield, lbl:req, tmp:req, tmp2:req get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] + /* + * If we are serving a softirq, there is no point in yielding: the + * softirq will not be preempted no matter what we do, so we should + * run to completion as quickly as we can. + */ + tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ +#ifdef CONFIG_PREEMPTION sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif + adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING + this_cpu_offset \tmp2 + ldr w\tmp, [\tmp, \tmp2] + cbnz w\tmp, \lbl // yield on pending softirq in task context +.Lnoyield_\@: .endm /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a36e2fc330d4..cc7267a24bf7 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -95,6 +95,8 @@ int main(void) DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); + DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); + DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 062b21f30f94..823e3a8a8871 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void) */ static void get_cpu_fpsimd_context(void) { - preempt_disable(); + local_bh_disable(); __get_cpu_fpsimd_context(); } @@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void) static void put_cpu_fpsimd_context(void) { __put_cpu_fpsimd_context(); - preempt_enable(); + local_bh_enable(); } static bool have_cpu_fpsimd_context(void) -- cgit v1.2.3 From ef9c5d09797db874a29a97407c3ea3990210432b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 12 Apr 2021 18:23:20 +0100 Subject: arm64/sve: Remove redundant system_supports_sve() tests Currently there are a number of places in the SVE code where we check both system_supports_sve() and TIF_SVE. This is a bit redundant given that we should never get into a situation where we have set TIF_SVE without having SVE support and it is not clear that silently ignoring a mistakenly set TIF_SVE flag is the most sensible error handling approach. For now let's just drop the system_supports_sve() checks since this will at least reduce overhead a little. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210412172320.3315-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0f58e45bd3d1..5bbb0547ba21 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -285,7 +285,7 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); @@ -307,7 +307,7 @@ static void fpsimd_save(void) WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { + if (test_thread_flag(TIF_SVE)) { if (WARN_ON(sve_get_vl() != last->sve_vl)) { /* * Can't save the user regs, so current would @@ -1100,7 +1100,7 @@ void fpsimd_preserve_current_state(void) void fpsimd_signal_preserve_current_state(void) { fpsimd_preserve_current_state(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (test_thread_flag(TIF_SVE)) sve_to_fpsimd(current); } @@ -1189,7 +1189,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; - if (system_supports_sve() && test_thread_flag(TIF_SVE)) + if (test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); task_fpsimd_load(); -- cgit v1.2.3 From 2f79d2fc391e4ba64df908b8c07dda6c3a907056 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 18 Mar 2021 20:10:52 -0700 Subject: arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere In an upcoming change we are going to introduce per-task SCTLR_EL1 bits for PAC. Move the existing per-task SCTLR_EL1 field out of the MTE-specific code so that we will be able to use it from both the PAC and MTE code paths and make the task switching code more efficient. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278 Link: https://lore.kernel.org/r/13d725cb8e741950fb9d6e64b2cd9bd54ff7c3f9.1616123271.git.pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 6 ++++- arch/arm64/kernel/mte.c | 49 ++++++++------------------------------ arch/arm64/kernel/process.c | 24 +++++++++++++++++++ 3 files changed, 39 insertions(+), 40 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d3286..80895bb30490 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -151,11 +151,13 @@ struct thread_struct { struct ptrauth_keys_kernel keys_kernel; #endif #ifdef CONFIG_ARM64_MTE - u64 sctlr_tcf0; u64 gcr_user_excl; #endif + u64 sctlr_user; }; +#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK + static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { @@ -247,6 +249,8 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); +void set_task_sctlr_el1(u64 sctlr); + /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 820bad94870e..8da597e41795 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -185,26 +185,6 @@ void mte_check_tfsr_el1(void) } #endif -static void update_sctlr_el1_tcf0(u64 tcf0) -{ - /* ISB required for the kernel uaccess routines */ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); - isb(); -} - -static void set_sctlr_el1_tcf0(u64 tcf0) -{ - /* - * mte_thread_switch() checks current->thread.sctlr_tcf0 as an - * optimisation. Disable preemption so that it does not see - * the variable update before the SCTLR_EL1.TCF0 one. - */ - preempt_disable(); - current->thread.sctlr_tcf0 = tcf0; - update_sctlr_el1_tcf0(tcf0); - preempt_enable(); -} - static void update_gcr_el1_excl(u64 excl) { @@ -237,31 +217,22 @@ void flush_mte_state(void) write_sysreg_s(0, SYS_TFSRE0_EL1); clear_thread_flag(TIF_MTE_ASYNC_FAULT); /* disable tag checking */ - set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); + set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) | + SCTLR_EL1_TCF0_NONE); /* reset tag generation mask */ set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK); } void mte_thread_switch(struct task_struct *next) { - if (!system_supports_mte()) - return; - - /* avoid expensive SCTLR_EL1 accesses if no change */ - if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) - update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); - else - isb(); - /* * Check if an async tag exception occurred at EL1. * * Note: On the context switch path we rely on the dsb() present * in __switch_to() to guarantee that the indirect writes to TFSR_EL1 * are synchronized before this point. - * isb() above is required for the same reason. - * */ + isb(); mte_check_tfsr_el1(); } @@ -291,7 +262,7 @@ void mte_suspend_exit(void) long set_mte_ctrl(struct task_struct *task, unsigned long arg) { - u64 tcf0; + u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK; u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & SYS_GCR_EL1_EXCL_MASK; @@ -300,23 +271,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) switch (arg & PR_MTE_TCF_MASK) { case PR_MTE_TCF_NONE: - tcf0 = SCTLR_EL1_TCF0_NONE; + sctlr |= SCTLR_EL1_TCF0_NONE; break; case PR_MTE_TCF_SYNC: - tcf0 = SCTLR_EL1_TCF0_SYNC; + sctlr |= SCTLR_EL1_TCF0_SYNC; break; case PR_MTE_TCF_ASYNC: - tcf0 = SCTLR_EL1_TCF0_ASYNC; + sctlr |= SCTLR_EL1_TCF0_ASYNC; break; default: return -EINVAL; } if (task != current) { - task->thread.sctlr_tcf0 = tcf0; + task->thread.sctlr_user = sctlr; task->thread.gcr_user_excl = gcr_excl; } else { - set_sctlr_el1_tcf0(tcf0); + set_task_sctlr_el1(sctlr); set_gcr_el1_excl(gcr_excl); } @@ -333,7 +304,7 @@ long get_mte_ctrl(struct task_struct *task) ret = incl << PR_MTE_TAG_SHIFT; - switch (task->thread.sctlr_tcf0) { + switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) { case SCTLR_EL1_TCF0_NONE: ret |= PR_MTE_TCF_NONE; break; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..de0ab08cb260 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -529,6 +529,27 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } +static void update_sctlr_el1(u64 sctlr) +{ + sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr); + + /* ISB required for the kernel uaccess routines when setting TCF0. */ + isb(); +} + +void set_task_sctlr_el1(u64 sctlr) +{ + /* + * __switch_to() checks current->thread.sctlr as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1 one. + */ + preempt_disable(); + current->thread.sctlr_user = sctlr; + update_sctlr_el1(sctlr); + preempt_enable(); +} + /* * Thread switching. */ @@ -559,6 +580,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, * registers. */ mte_thread_switch(next); + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (prev->thread.sctlr_user != next->thread.sctlr_user) + update_sctlr_el1(next->thread.sctlr_user); /* the actual thread switch */ last = cpu_switch_to(prev, next); -- cgit v1.2.3 From 201698626fbca1cf1a3b686ba14cf2a056500716 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 18 Mar 2021 20:10:53 -0700 Subject: arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS) This change introduces a prctl that allows the user program to control which PAC keys are enabled in a particular task. The main reason why this is useful is to enable a userspace ABI that uses PAC to sign and authenticate function pointers and other pointers exposed outside of the function, while still allowing binaries conforming to the ABI to interoperate with legacy binaries that do not sign or authenticate pointers. The idea is that a dynamic loader or early startup code would issue this prctl very early after establishing that a process may load legacy binaries, but before executing any PAC instructions. This change adds a small amount of overhead to kernel entry and exit due to additional required instruction sequences. On a DragonBoard 845c (Cortex-A75) with the powersave governor, the overhead of similar instruction sequences was measured as 4.9ns when simulating the common case where IA is left enabled, or 43.7ns when simulating the uncommon case where IA is disabled. These numbers can be seen as the worst case scenario, since in more realistic scenarios a better performing governor would be used and a newer chip would be used that would support PAC unlike Cortex-A75 and would be expected to be faster than Cortex-A75. On an Apple M1 under a hypervisor, the overhead of the entry/exit instruction sequences introduced by this patch was measured as 0.3ns in the case where IA is left enabled, and 33.0ns in the case where IA is disabled. Signed-off-by: Peter Collingbourne Reviewed-by: Dave Martin Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5 Link: https://lore.kernel.org/r/d6609065f8f40397a4124654eb68c9f490b4d477.1616123271.git.pcc@google.com Signed-off-by: Catalin Marinas --- Documentation/arm64/pointer-authentication.rst | 34 ++++++++++++++ arch/arm64/include/asm/mte.h | 4 +- arch/arm64/include/asm/pointer_auth.h | 25 +++++++++-- arch/arm64/include/asm/processor.h | 9 +++- arch/arm64/include/asm/sysreg.h | 4 +- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 39 +++++++++++++++- arch/arm64/kernel/mte.c | 2 +- arch/arm64/kernel/pointer_auth.c | 62 ++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 10 +++-- arch/arm64/kernel/ptrace.c | 41 +++++++++++++++++ include/uapi/linux/elf.h | 1 + include/uapi/linux/prctl.h | 4 ++ kernel/sys.c | 16 +++++++ 14 files changed, 239 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst index 30b2ab06526b..f127666ea3a8 100644 --- a/Documentation/arm64/pointer-authentication.rst +++ b/Documentation/arm64/pointer-authentication.rst @@ -107,3 +107,37 @@ filter out the Pointer Authentication system key registers from KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID register. Any attempt to use the Pointer Authentication instructions will result in an UNDEFINED exception being injected into the guest. + + +Enabling and disabling keys +--------------------------- + +The prctl PR_PAC_SET_ENABLED_KEYS allows the user program to control which +PAC keys are enabled in a particular task. It takes two arguments, the +first being a bitmask of PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY +and PR_PAC_APDBKEY specifying which keys shall be affected by this prctl, +and the second being a bitmask of the same bits specifying whether the key +should be enabled or disabled. For example:: + + prctl(PR_PAC_SET_ENABLED_KEYS, + PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY, + PR_PAC_APIBKEY, 0, 0); + +disables all keys except the IB key. + +The main reason why this is useful is to enable a userspace ABI that uses PAC +instructions to sign and authenticate function pointers and other pointers +exposed outside of the function, while still allowing binaries conforming to +the ABI to interoperate with legacy binaries that do not sign or authenticate +pointers. + +The idea is that a dynamic loader or early startup code would issue this +prctl very early after establishing that a process may load legacy binaries, +but before executing any PAC instructions. + +For compatibility with previous kernel versions, processes start up with IA, +IB, DA and DB enabled, and are reset to this state on exec(). Processes created +via fork() and clone() inherit the key enabled state from the calling process. + +It is recommended to avoid disabling the IA key, as this has higher performance +overhead than disabling any of the other keys. diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index a38abc15186c..5d6a53cca5d5 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -39,7 +39,7 @@ void mte_free_tag_storage(char *storage); void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); -void flush_mte_state(void); +void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); void mte_suspend_enter(void); void mte_suspend_exit(void); @@ -61,7 +61,7 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte) static inline void mte_copy_page_tags(void *kto, const void *kfrom) { } -static inline void flush_mte_state(void) +static inline void mte_thread_init_user(void) { } static inline void mte_thread_switch(struct task_struct *next) diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index b112a11e9302..cefe7e9b6fa9 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -3,6 +3,7 @@ #define __ASM_POINTER_AUTH_H #include +#include #include #include @@ -71,6 +72,10 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); +extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, + unsigned long enabled); +extern int ptrauth_get_enabled_keys(struct task_struct *tsk); + static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) { return ptrauth_clear_pac(ptr); @@ -85,8 +90,17 @@ static __always_inline void ptrauth_enable(void) isb(); } -#define ptrauth_thread_init_user(tsk) \ - ptrauth_keys_init_user(&(tsk)->thread.keys_user) +#define ptrauth_thread_init_user() \ + do { \ + ptrauth_keys_init_user(¤t->thread.keys_user); \ + \ + /* enable all keys */ \ + if (system_supports_address_auth()) \ + set_task_sctlr_el1(current->thread.sctlr_user | \ + SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB); \ + } while (0) + #define ptrauth_thread_init_kernel(tsk) \ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel) #define ptrauth_thread_switch_kernel(tsk) \ @@ -95,10 +109,15 @@ static __always_inline void ptrauth_enable(void) #else /* CONFIG_ARM64_PTR_AUTH */ #define ptrauth_enable() #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) +#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL) +#define ptrauth_get_enabled_keys(tsk) (-EINVAL) #define ptrauth_strip_insn_pac(lr) (lr) -#define ptrauth_thread_init_user(tsk) +#define ptrauth_thread_init_user() #define ptrauth_thread_init_kernel(tsk) #define ptrauth_thread_switch_kernel(tsk) #endif /* CONFIG_ARM64_PTR_AUTH */ +#define PR_PAC_ENABLED_KEYS_MASK \ + (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY) + #endif /* __ASM_POINTER_AUTH_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 80895bb30490..2f21c76324bb 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -156,7 +156,9 @@ struct thread_struct { u64 sctlr_user; }; -#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK +#define SCTLR_USER_MASK \ + (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ + SCTLR_EL1_TCF0_MASK) static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) @@ -305,6 +307,11 @@ extern void __init minsigstksz_setup(void); /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) +/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */ +#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled) \ + ptrauth_set_enabled_keys(tsk, keys, enabled) +#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk) + #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..9dc2e88b10c6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -565,8 +565,10 @@ #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_ENIA_SHIFT 31 + #define SCTLR_ELx_ITFSB (BIT(37)) -#define SCTLR_ELx_ENIA (BIT(31)) +#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) #define SCTLR_ELx_ENIB (BIT(30)) #define SCTLR_ELx_ENDA (BIT(27)) #define SCTLR_ELx_EE (BIT(25)) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a36e2fc330d4..8b4fbad427b3 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -43,6 +43,7 @@ int main(void) #endif BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); + DEFINE(THREAD_SCTLR_USER, offsetof(struct task_struct, thread.sctlr_user)); #ifdef CONFIG_ARM64_PTR_AUTH DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user)); DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..36ae1cdfaeb6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -247,7 +247,24 @@ alternative_else_nop_endif check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 - ptrauth_keys_install_kernel tsk, x20, x22, x23 + ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23 + +#ifdef CONFIG_ARM64_PTR_AUTH +alternative_if ARM64_HAS_ADDRESS_AUTH + /* + * Enable IA for in-kernel PAC if the task had it disabled. Although + * this could be implemented with an unconditional MRS which would avoid + * a load, this was measured to be slower on Cortex-A75 and Cortex-A76. + */ + ldr x0, [tsk, THREAD_SCTLR_USER] + tbnz x0, SCTLR_ELx_ENIA_SHIFT, 1f + mrs x0, sctlr_el1 + orr x0, x0, SCTLR_ELx_ENIA + msr sctlr_el1, x0 +1: + isb +alternative_else_nop_endif +#endif mte_set_kernel_gcr x22, x23 @@ -351,9 +368,27 @@ alternative_else_nop_endif 3: scs_save tsk, x0 - /* No kernel C function calls after this as user keys are set. */ + /* + * No kernel C function calls after this as user keys are set and IA may + * be disabled. + */ ptrauth_keys_install_user tsk, x0, x1, x2 +#ifdef CONFIG_ARM64_PTR_AUTH +alternative_if ARM64_HAS_ADDRESS_AUTH + /* + * IA was enabled for in-kernel PAC. Disable it now if needed. + * All other per-task SCTLR bits were updated on task switch. + */ + ldr x0, [tsk, THREAD_SCTLR_USER] + tbnz x0, SCTLR_ELx_ENIA_SHIFT, 1f + mrs x0, sctlr_el1 + bic x0, x0, SCTLR_ELx_ENIA + msr sctlr_el1, x0 +1: +alternative_else_nop_endif +#endif + mte_set_user_gcr tsk, x0, x1 apply_ssbd 0, x0, x1 diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 8da597e41795..125a10e413e9 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -207,7 +207,7 @@ static void set_gcr_el1_excl(u64 excl) */ } -void flush_mte_state(void) +void mte_thread_init_user(void) { if (!system_supports_mte()) return; diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c index adb955fd9bdd..f03e5bfe4490 100644 --- a/arch/arm64/kernel/pointer_auth.c +++ b/arch/arm64/kernel/pointer_auth.c @@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg) return 0; } + +static u64 arg_to_enxx_mask(unsigned long arg) +{ + u64 sctlr_enxx_mask = 0; + + WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK); + if (arg & PR_PAC_APIAKEY) + sctlr_enxx_mask |= SCTLR_ELx_ENIA; + if (arg & PR_PAC_APIBKEY) + sctlr_enxx_mask |= SCTLR_ELx_ENIB; + if (arg & PR_PAC_APDAKEY) + sctlr_enxx_mask |= SCTLR_ELx_ENDA; + if (arg & PR_PAC_APDBKEY) + sctlr_enxx_mask |= SCTLR_ELx_ENDB; + return sctlr_enxx_mask; +} + +int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, + unsigned long enabled) +{ + u64 sctlr = tsk->thread.sctlr_user; + + if (!system_supports_address_auth()) + return -EINVAL; + + if (is_compat_thread(task_thread_info(tsk))) + return -EINVAL; + + if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys)) + return -EINVAL; + + sctlr &= ~arg_to_enxx_mask(keys); + sctlr |= arg_to_enxx_mask(enabled); + if (tsk == current) + set_task_sctlr_el1(sctlr); + else + tsk->thread.sctlr_user = sctlr; + + return 0; +} + +int ptrauth_get_enabled_keys(struct task_struct *tsk) +{ + int retval = 0; + + if (!system_supports_address_auth()) + return -EINVAL; + + if (is_compat_thread(task_thread_info(tsk))) + return -EINVAL; + + if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA) + retval |= PR_PAC_APIAKEY; + if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB) + retval |= PR_PAC_APIBKEY; + if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA) + retval |= PR_PAC_APDAKEY; + if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB) + retval |= PR_PAC_APDBKEY; + + return retval; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index de0ab08cb260..d583cbae32f6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -339,7 +339,6 @@ void flush_thread(void) tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); - flush_mte_state(); } void release_thread(struct task_struct *dead_task) @@ -531,7 +530,11 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, static void update_sctlr_el1(u64 sctlr) { - sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr); + /* + * EnIA must not be cleared while in the kernel as this is necessary for + * in-kernel PAC. It will be cleared on kernel exit if needed. + */ + sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); /* ISB required for the kernel uaccess routines when setting TCF0. */ isb(); @@ -632,7 +635,8 @@ void arch_setup_new_exec(void) { current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; - ptrauth_thread_init_user(current); + ptrauth_thread_init_user(); + mte_thread_init_user(); if (task_spec_ssb_noexec(current)) { arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 170f42fd6101..eb2f73939b7b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -909,6 +909,38 @@ static int pac_mask_get(struct task_struct *target, return membuf_write(&to, &uregs, sizeof(uregs)); } +static int pac_enabled_keys_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long enabled_keys = ptrauth_get_enabled_keys(target); + + if (IS_ERR_VALUE(enabled_keys)) + return enabled_keys; + + return membuf_write(&to, &enabled_keys, sizeof(enabled_keys)); +} + +static int pac_enabled_keys_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long enabled_keys = ptrauth_get_enabled_keys(target); + + if (IS_ERR_VALUE(enabled_keys)) + return enabled_keys; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0, + sizeof(long)); + if (ret) + return ret; + + return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK, + enabled_keys); +} + #ifdef CONFIG_CHECKPOINT_RESTORE static __uint128_t pac_key_to_user(const struct ptrauth_key *key) { @@ -1074,6 +1106,7 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, + REGSET_PAC_ENABLED_KEYS, #ifdef CONFIG_CHECKPOINT_RESTORE REGSET_PACA_KEYS, REGSET_PACG_KEYS, @@ -1160,6 +1193,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = pac_mask_get, /* this cannot be set dynamically */ }, + [REGSET_PAC_ENABLED_KEYS] = { + .core_note_type = NT_ARM_PAC_ENABLED_KEYS, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = pac_enabled_keys_get, + .set = pac_enabled_keys_set, + }, #ifdef CONFIG_CHECKPOINT_RESTORE [REGSET_PACA_KEYS] = { .core_note_type = NT_ARM_PACA_KEYS, diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 30f68b42eeb5..61bf4774b8f2 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -426,6 +426,7 @@ typedef struct elf64_shdr { #define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ +#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 667f1aed091c..18a9f59dc067 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -255,4 +255,8 @@ struct prctl_mm_map { # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 2e2e3f378d97..3d62c9599dc0 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -119,6 +119,12 @@ #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif +#ifndef PAC_SET_ENABLED_KEYS +# define PAC_SET_ENABLED_KEYS(a, b, c) (-EINVAL) +#endif +#ifndef PAC_GET_ENABLED_KEYS +# define PAC_GET_ENABLED_KEYS(a) (-EINVAL) +#endif #ifndef SET_TAGGED_ADDR_CTRL # define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) #endif @@ -2497,6 +2503,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = PAC_RESET_KEYS(me, arg2); break; + case PR_PAC_SET_ENABLED_KEYS: + if (arg4 || arg5) + return -EINVAL; + error = PAC_SET_ENABLED_KEYS(me, arg2, arg3); + break; + case PR_PAC_GET_ENABLED_KEYS: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = PAC_GET_ENABLED_KEYS(me); + break; case PR_SET_TAGGED_ADDR_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; -- cgit v1.2.3 From b90e483938ce387c256e03fb144f82f64551847b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 18 Mar 2021 20:10:54 -0700 Subject: arm64: pac: Optimize kernel entry/exit key installation code paths The kernel does not use any keys besides IA so we don't need to install IB/DA/DB/GA on kernel exit if we arrange to install them on task switch instead, which we can expect to happen an order of magnitude less often. Furthermore we can avoid installing the user IA in the case where the user task has IA disabled and just leave the kernel IA installed. This also lets us avoid needing to install IA on kernel entry. On an Apple M1 under a hypervisor, the overhead of kernel entry/exit has been measured to be reduced by 15.6ns in the case where IA is enabled, and 31.9ns in the case where IA is disabled. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/Ieddf6b580d23c9e0bed45a822dabe72d2ffc9a8e Link: https://lore.kernel.org/r/2d653d055f38f779937f2b92f8ddd5cf9e4af4f4.1616123271.git.pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/asm_pointer_auth.h | 20 +---------------- arch/arm64/include/asm/pointer_auth.h | 36 +++++++++++++++++++++++++------ arch/arm64/kernel/asm-offsets.c | 4 ---- arch/arm64/kernel/entry.S | 33 ++++++++++++++++------------ arch/arm64/kernel/pointer_auth.c | 1 + arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/suspend.c | 3 ++- 7 files changed, 53 insertions(+), 45 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index 52dead2a8640..8ca2dc0661ee 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -13,30 +13,12 @@ * so use the base value of ldp as thread.keys_user and offset as * thread.keys_user.ap*. */ - .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 + .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 mov \tmp1, #THREAD_KEYS_USER add \tmp1, \tsk, \tmp1 -alternative_if_not ARM64_HAS_ADDRESS_AUTH - b .Laddr_auth_skip_\@ -alternative_else_nop_endif ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA] msr_s SYS_APIAKEYLO_EL1, \tmp2 msr_s SYS_APIAKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB] - msr_s SYS_APIBKEYLO_EL1, \tmp2 - msr_s SYS_APIBKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA] - msr_s SYS_APDAKEYLO_EL1, \tmp2 - msr_s SYS_APDAKEYHI_EL1, \tmp3 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB] - msr_s SYS_APDBKEYLO_EL1, \tmp2 - msr_s SYS_APDBKEYHI_EL1, \tmp3 -.Laddr_auth_skip_\@: -alternative_if ARM64_HAS_GENERIC_AUTH - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA] - msr_s SYS_APGAKEYLO_EL1, \tmp2 - msr_s SYS_APGAKEYHI_EL1, \tmp3 -alternative_else_nop_endif .endm .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index cefe7e9b6fa9..d50416be99be 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -35,6 +35,25 @@ struct ptrauth_keys_kernel { struct ptrauth_key apia; }; +#define __ptrauth_key_install_nosync(k, v) \ +do { \ + struct ptrauth_key __pki_v = (v); \ + write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ + write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ +} while (0) + +static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys) +{ + if (system_supports_address_auth()) { + __ptrauth_key_install_nosync(APIB, keys->apib); + __ptrauth_key_install_nosync(APDA, keys->apda); + __ptrauth_key_install_nosync(APDB, keys->apdb); + } + + if (system_supports_generic_auth()) + __ptrauth_key_install_nosync(APGA, keys->apga); +} + static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys) { if (system_supports_address_auth()) { @@ -46,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys) if (system_supports_generic_auth()) get_random_bytes(&keys->apga, sizeof(keys->apga)); -} -#define __ptrauth_key_install_nosync(k, v) \ -do { \ - struct ptrauth_key __pki_v = (v); \ - write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ - write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ -} while (0) + ptrauth_keys_install_user(keys); +} static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys) { @@ -90,6 +104,9 @@ static __always_inline void ptrauth_enable(void) isb(); } +#define ptrauth_suspend_exit() \ + ptrauth_keys_install_user(¤t->thread.keys_user) + #define ptrauth_thread_init_user() \ do { \ ptrauth_keys_init_user(¤t->thread.keys_user); \ @@ -101,6 +118,9 @@ static __always_inline void ptrauth_enable(void) SCTLR_ELx_ENDA | SCTLR_ELx_ENDB); \ } while (0) +#define ptrauth_thread_switch_user(tsk) \ + ptrauth_keys_install_user(&(tsk)->thread.keys_user) + #define ptrauth_thread_init_kernel(tsk) \ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel) #define ptrauth_thread_switch_kernel(tsk) \ @@ -112,8 +132,10 @@ static __always_inline void ptrauth_enable(void) #define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL) #define ptrauth_get_enabled_keys(tsk) (-EINVAL) #define ptrauth_strip_insn_pac(lr) (lr) +#define ptrauth_suspend_exit() #define ptrauth_thread_init_user() #define ptrauth_thread_init_kernel(tsk) +#define ptrauth_thread_switch_user(tsk) #define ptrauth_thread_switch_kernel(tsk) #endif /* CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 8b4fbad427b3..a02573c32f79 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -148,10 +148,6 @@ int main(void) #endif #ifdef CONFIG_ARM64_PTR_AUTH DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia)); - DEFINE(PTRAUTH_USER_KEY_APIB, offsetof(struct ptrauth_keys_user, apib)); - DEFINE(PTRAUTH_USER_KEY_APDA, offsetof(struct ptrauth_keys_user, apda)); - DEFINE(PTRAUTH_USER_KEY_APDB, offsetof(struct ptrauth_keys_user, apdb)); - DEFINE(PTRAUTH_USER_KEY_APGA, offsetof(struct ptrauth_keys_user, apga)); DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia)); BLANK(); #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 36ae1cdfaeb6..2e46c75a5b48 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -247,21 +247,26 @@ alternative_else_nop_endif check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 - ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23 - #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH /* * Enable IA for in-kernel PAC if the task had it disabled. Although * this could be implemented with an unconditional MRS which would avoid * a load, this was measured to be slower on Cortex-A75 and Cortex-A76. + * + * Install the kernel IA key only if IA was enabled in the task. If IA + * was disabled on kernel exit then we would have left the kernel IA + * installed so there is no need to install it again. */ ldr x0, [tsk, THREAD_SCTLR_USER] - tbnz x0, SCTLR_ELx_ENIA_SHIFT, 1f + tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f + __ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23 + b 2f +1: mrs x0, sctlr_el1 orr x0, x0, SCTLR_ELx_ENIA msr sctlr_el1, x0 -1: +2: isb alternative_else_nop_endif #endif @@ -368,24 +373,24 @@ alternative_else_nop_endif 3: scs_save tsk, x0 - /* - * No kernel C function calls after this as user keys are set and IA may - * be disabled. - */ - ptrauth_keys_install_user tsk, x0, x1, x2 - #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH /* - * IA was enabled for in-kernel PAC. Disable it now if needed. - * All other per-task SCTLR bits were updated on task switch. + * IA was enabled for in-kernel PAC. Disable it now if needed, or + * alternatively install the user's IA. All other per-task keys and + * SCTLR bits were updated on task switch. + * + * No kernel C function calls after this. */ ldr x0, [tsk, THREAD_SCTLR_USER] - tbnz x0, SCTLR_ELx_ENIA_SHIFT, 1f + tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f + __ptrauth_keys_install_user tsk, x0, x1, x2 + b 2f +1: mrs x0, sctlr_el1 bic x0, x0, SCTLR_ELx_ENIA msr sctlr_el1, x0 -1: +2: alternative_else_nop_endif #endif diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c index f03e5bfe4490..60901ab0a7fe 100644 --- a/arch/arm64/kernel/pointer_auth.c +++ b/arch/arm64/kernel/pointer_auth.c @@ -43,6 +43,7 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg) get_random_bytes(&keys->apdb, sizeof(keys->apdb)); if (arg & PR_PAC_APGAKEY) get_random_bytes(&keys->apga, sizeof(keys->apga)); + ptrauth_keys_install_user(keys); return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index d583cbae32f6..449b94584f9a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -568,6 +568,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); + ptrauth_thread_switch_user(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 6fdc8292b4f5..e3f72df9509d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -74,8 +74,9 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); - /* Restore additional MTE-specific configuration */ + /* Restore additional feature-specific configuration */ mte_suspend_exit(); + ptrauth_suspend_exit(); } /* -- cgit v1.2.3 From 839157876f97fcc7ead0b62c9377bb50f75a3df9 Mon Sep 17 00:00:00 2001 From: zhouchuangao Date: Tue, 30 Mar 2021 04:57:50 -0700 Subject: arm64/kernel/probes: Use BUG_ON instead of if condition followed by BUG. It can be optimized at compile time. Signed-off-by: zhouchuangao Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/1617105472-6081-1-git-send-email-zhouchuangao@vivo.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 66aac2881ba8..ecf0f613327f 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -264,8 +264,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) * normal page fault. */ instruction_pointer_set(regs, (unsigned long) cur->addr); - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); -- cgit v1.2.3 From 087dfa5ca7d89c3cf6f4e972e279406a5dee5f67 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 15 Apr 2021 13:17:42 +0100 Subject: arm64/sve: Add compile time checks for SVE hooks in generic functions The FPSIMD code was relying on IS_ENABLED() checks in system_suppors_sve() to cause the compiler to delete references to SVE functions in some places, add explicit IS_ENABLED() checks back. Fixes: ef9c5d09797d ("arm64/sve: Remove redundant system_supports_sve() tests") Reported-by: kernel test robot Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210415121742.36628-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5bbb0547ba21..da8cc14746e9 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -285,7 +285,7 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); @@ -307,7 +307,8 @@ static void fpsimd_save(void) WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (test_thread_flag(TIF_SVE)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { if (WARN_ON(sve_get_vl() != last->sve_vl)) { /* * Can't save the user regs, so current would -- cgit v1.2.3