From 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:48 +0800 Subject: x86/traps: Use pt_regs directly in fixup_bad_iret() Always stash the address error_entry() is going to return to, in %r12 and get rid of the void *error_entry_ret; slot in struct bad_iret_stack which was supposed to account for it and pt_regs pushed on the stack. After this, both fixup_bad_iret() and sync_regs() can work on a struct pt_regs pointer directly. [ bp: Rewrite commit message, touch ups. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73d958522b6a..ecbfca3cc18c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1061,9 +1061,12 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - mov %rsp, %rdi + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rsp + ENCODE_FRAME_POINTER + pushq %r12 jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) -- cgit v1.2.3 From 520a7e80c96d655fbe4650d9cc985bd9d0443389 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:49 +0800 Subject: x86/entry: Switch the stack after error_entry() returns error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault from a bad IRET, to copy pt_regs to the kernel stack. It switches to the kernel stack directly after sync_regs(). But error_entry() itself is also a function call, so it has to stash the address it is going to return to, in %r12 which is unnecessarily complicated. Move the stack switching after error_entry() and get rid of the need to handle the return address. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ecbfca3cc18c..ca3e99e08a44 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -326,6 +326,8 @@ SYM_CODE_END(ret_from_fork) .macro idtentry_body cfunc has_error_code:req call error_entry + movq %rax, %rsp /* switch to the task stack if from userspace */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -1002,14 +1004,10 @@ SYM_CODE_START_LOCAL(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ - ENCODE_FRAME_POINTER - pushq %r12 RET /* @@ -1041,6 +1039,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ RET .Lbstep_iret: @@ -1061,12 +1060,9 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret - mov %rax, %rsp - ENCODE_FRAME_POINTER - pushq %r12 + mov %rax, %rdi jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) -- cgit v1.2.3 From ee774dac0da1543376a69fd90840af6aa86879b3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:50 +0800 Subject: x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() The macro idtentry() (through idtentry_body()) calls error_entry() unconditionally even on XENPV. But XENPV needs to only push and clear regs. PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its original place when the function returns, which means it is not possible to convert it to a C function. Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate function and call it before error_entry() in order to avoid calling error_entry() on XENPV. It will also allow for error_entry() to be converted to C code that can use inlined sync_regs() and save a function call. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ca3e99e08a44..b1cef3b0a7ab 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -318,6 +318,14 @@ SYM_CODE_END(ret_from_fork) #endif .endm +/* Save all registers in pt_regs */ +SYM_CODE_START_LOCAL(push_and_clear_regs) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + RET +SYM_CODE_END(push_and_clear_regs) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -325,6 +333,9 @@ SYM_CODE_END(ret_from_fork) */ .macro idtentry_body cfunc has_error_code:req + call push_and_clear_regs + UNWIND_HINT_REGS + call error_entry movq %rax, %rsp /* switch to the task stack if from userspace */ ENCODE_FRAME_POINTER @@ -985,13 +996,11 @@ SYM_CODE_START_LOCAL(paranoid_exit) SYM_CODE_END(paranoid_exit) /* - * Save all registers in pt_regs, and switch GS if needed. + * Switch GS and CR3 if needed. */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace -- cgit v1.2.3 From c64cc2802a784ecfd25d39945e57e7a147854a5b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:51 +0800 Subject: x86/entry: Move CLD to the start of the idtentry macro Move it after CLAC. Suggested-by: Peter Zijlstra Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-5-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b1cef3b0a7ab..ab6ab6d3dab5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -371,6 +371,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 ENDBR ASM_CLAC + cld .if \has_error_code == 0 pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -439,6 +440,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS ENDBR ASM_CLAC + cld pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -495,6 +497,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS ENDBR ASM_CLAC + cld /* * If the entry is from userspace, switch stacks and treat it as @@ -557,6 +560,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 ENDBR ASM_CLAC + cld /* paranoid_entry returns GS information for paranoid_exit in EBX. */ call paranoid_entry @@ -882,7 +886,6 @@ SYM_CODE_END(xen_failsafe_callback) */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC - cld PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -1000,7 +1003,6 @@ SYM_CODE_END(paranoid_exit) */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC - cld testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1134,6 +1136,7 @@ SYM_CODE_START(asm_exc_nmi) */ ASM_CLAC + cld /* Use %rdx as our temp variable throughout */ pushq %rdx @@ -1153,7 +1156,6 @@ SYM_CODE_START(asm_exc_nmi) */ swapgs - cld FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx -- cgit v1.2.3 From 64cbd0acb58203fb769ed2f4eab526d43e243847 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:06 +0800 Subject: x86/entry: Don't call error_entry() for XENPV XENPV guests enter already on the task stack and they can't fault for native_iret() nor native_load_gs_index() since they use their own pvop for IRET and load_gs_index(). A CR3 switch is not needed either. So there is no reason to call error_entry() in XENPV. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ab6ab6d3dab5..062aa9d95961 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -336,8 +336,17 @@ SYM_CODE_END(push_and_clear_regs) call push_and_clear_regs UNWIND_HINT_REGS - call error_entry - movq %rax, %rsp /* switch to the task stack if from userspace */ + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "", X86_FEATURE_XENPV + ENCODE_FRAME_POINTER UNWIND_HINT_REGS -- cgit v1.2.3 From c89191ce67efa4e5353db6a67f7287c28e673740 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:07 +0800 Subject: x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS XENPV doesn't use swapgs_restore_regs_and_return_to_usermode(), error_entry() and the code between entry_SYSENTER_compat() and entry_SYSENTER_compat_after_hwframe. Change the PV-compatible SWAPGS to the ASM instruction swapgs in these places. Also remove the definition of SWAPGS since no more users. Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-7-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/entry/entry_64_compat.S | 2 +- arch/x86/include/asm/irqflags.h | 8 -------- 3 files changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 062aa9d95961..312186612f4e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1019,7 +1019,7 @@ SYM_CODE_START_LOCAL(error_entry) * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax @@ -1051,7 +1051,7 @@ SYM_CODE_START_LOCAL(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - SWAPGS + swapgs /* * Issue an LFENCE to prevent GS speculation, regardless of whether it is a @@ -1072,7 +1072,7 @@ SYM_CODE_START_LOCAL(error_entry) * We came from an IRET to user mode, so we have user * gsbase and CR3. Switch to kernel gsbase and CR3: */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4fdb007cddbd..c5aeb0819707 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat) UNWIND_HINT_EMPTY ENDBR /* Interrupts are off on entry. */ - SWAPGS + swapgs pushq %rax SWITCH_TO_KERNEL_CR3 scratch_reg=%rax diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 111104d1c2cd..7793e52d6237 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -137,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) if (!arch_irqs_disabled_flags(flags)) arch_local_irq_enable(); } -#else -#ifdef CONFIG_X86_64 -#ifdef CONFIG_XEN_PV -#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV -#else -#define SWAPGS swapgs -#endif -#endif #endif /* !__ASSEMBLY__ */ #endif -- cgit v1.2.3 From d205222eb6a8e5e70c21200beb81c6e19ec211d6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 May 2022 14:14:33 +0200 Subject: x86/entry: Simplify entry_INT80_compat() Instead of playing silly games with rdi, use rax for simpler and more consistent code. Signed-off-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.221072885@infradead.org --- arch/x86/entry/entry_64_compat.S | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index c5aeb0819707..d743eaa19d9b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -362,26 +362,25 @@ SYM_CODE_START(entry_INT80_compat) /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* In the Xen PV case we already run on the thread stack. */ ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - movq %rsp, %rdi + movq %rsp, %rax movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - pushq 6*8(%rdi) /* regs->ss */ - pushq 5*8(%rdi) /* regs->rsp */ - pushq 4*8(%rdi) /* regs->eflags */ - pushq 3*8(%rdi) /* regs->cs */ - pushq 2*8(%rdi) /* regs->ip */ - pushq 1*8(%rdi) /* regs->orig_ax */ - pushq (%rdi) /* pt_regs->di */ + pushq 5*8(%rax) /* regs->ss */ + pushq 4*8(%rax) /* regs->rsp */ + pushq 3*8(%rax) /* regs->eflags */ + pushq 2*8(%rax) /* regs->cs */ + pushq 1*8(%rax) /* regs->ip */ + pushq 0*8(%rax) /* regs->orig_ax */ .Lint80_keep_stack: + pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ -- cgit v1.2.3 From 8c42819b61b8340cff0643e65b5ce6a4144ab155 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 May 2022 14:14:34 +0200 Subject: x86/entry: Use PUSH_AND_CLEAR_REGS for compat Since the upper regs don't exist for ia32 code, preserving them doesn't hurt and it simplifies the code. This doesn't add any attack surface that would not already be available through INT80. Notably: - 32bit SYSENTER: didn't clear si, dx, cx. - 32bit SYSCALL, INT80: *do* clear si since the C functions don't take a second argument. - 64bit: didn't clear si since the C functions take a second argument; except the error_entry path might have only one argument, so clearing si was missing here. 32b SYSENTER should be clearing all those 3 registers, nothing uses them and selftests pass. Unconditionally clear rsi since it simplifies code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.293889636@infradead.org --- arch/x86/entry/calling.h | 1 + arch/x86/entry/entry_64_compat.S | 87 ++-------------------------------------- 2 files changed, 4 insertions(+), 84 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a4c061fb7c6e..debbe94aa3db 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -99,6 +99,7 @@ For 32-bit we have the following conventions - kernel is built with * well before they could be put to use in a speculative execution * gadget. */ + xorl %esi, %esi /* nospec si */ xorl %edx, %edx /* nospec dx */ xorl %ecx, %ecx /* nospec cx */ xorl %r8d, %r8d /* nospec r8 */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d743eaa19d9b..ed2be3615b50 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ - xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ - xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rax=$-ENOSYS UNWIND_HINT_REGS cld @@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - xorl %esi, %esi /* nospec si */ - pushq %rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ - pushq %rbp /* pt_regs->cx (stashed in bp) */ - xorl %ecx, %ecx /* nospec cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ - xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ - xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rax=$-ENOSYS UNWIND_HINT_REGS movq %rsp, %rdi @@ -380,35 +327,7 @@ SYM_CODE_START(entry_INT80_compat) pushq 0*8(%rax) /* regs->orig_ax */ .Lint80_keep_stack: - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - xorl %esi, %esi /* nospec si */ - pushq %rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ - pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq %r10 /* pt_regs->r10*/ - xorl %r10d, %r10d /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp */ - pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rax=$-ENOSYS UNWIND_HINT_REGS cld -- cgit v1.2.3 From 1b331eeea7b8676fc5dbdf80d0a07e41be226177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 May 2022 14:14:35 +0200 Subject: x86/entry: Remove skip_r11rcx Yes, r11 and rcx have been restored previously, but since they're being popped anyway (into rsi) might as well pop them into their own regs -- setting them to the value they already are. Less magical code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org --- arch/x86/entry/calling.h | 10 +--------- arch/x86/entry/entry_64.S | 3 +-- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index debbe94aa3db..a97cc78ecb92 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -120,27 +120,19 @@ For 32-bit we have the following conventions - kernel is built with CLEAR_REGS .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 312186612f4e..3a1e3f215617 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. -- cgit v1.2.3 From 036c07c0c3b8a57d5c96e1f2aab62da0056f8f21 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 19 May 2022 11:46:58 -0700 Subject: x86/entry: Fix register corruption in compat syscall A panic was reported in the init process on AMD: Run /sbin/init as init process init[1]: segfault at f7fd5ca0 ip 00000000f7f5bbc7 sp 00000000ffa06aa0 error 7 in libc.so[f7f51000+4e000] Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b CPU: 1 PID: 1 Comm: init Tainted: G W 5.18.0-rc7-next-20220519 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d panic+0x10f/0x28d do_exit.cold+0x18/0x48 do_group_exit+0x2e/0xb0 get_signal+0xb6d/0xb80 arch_do_signal_or_restart+0x31/0x760 ? show_opcodes.cold+0x1c/0x21 ? force_sig_fault+0x49/0x70 exit_to_user_mode_prepare+0x131/0x1a0 irqentry_exit_to_user_mode+0x5/0x30 asm_exc_page_fault+0x27/0x30 RIP: 0023:0xf7f5bbc7 Code: 8a 44 24 10 88 41 ff 8b 44 24 10 83 c4 2c 5b 5e 5f 5d c3 53 83 ec 08 8b 5c 24 10 81 fb 00 f0 ff ff 76 0c e8 ba dc ff ff f7 db <89> 18 83 cb ff 83 c4 08 89 d8 5b c3 e8 81 60 ff ff 05 28 84 07 00 RSP: 002b:00000000ffa06aa0 EFLAGS: 00000217 RAX: 00000000f7fd5ca0 RBX: 000000000000000c RCX: 0000000000001000 RDX: 0000000000000001 RSI: 00000000f7fd5b60 RDI: 00000000f7fd5b60 RBP: 00000000f7fd1c1c R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The task's CX register got corrupted by commit 8c42819b61b8 ("x86/entry: Use PUSH_AND_CLEAR_REGS for compat"), which overlooked the fact that compat SYSCALL apparently stores the user's CX value in BP. Before that commit, CX was saved from its stashed value in BP: pushq %rbp /* pt_regs->cx (stashed in bp) */ But then it got changed to: pushq %rcx /* pt_regs->cx */ So the wrong value got saved and later restored back to the user. Fix it by pushing the correct value again (BP) for regs->cx. Fixes: 8c42819b61b8 ("x86/entry: Use PUSH_AND_CLEAR_REGS for compat") Reported-by: Guenter Roeck Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Tested-by: Guenter Roeck Link: https://lkml.kernel.org/r/b5a26592c9dd60bbacdf97974a7433fd802a5593.1652985970.git.jpoimboe@kernel.org --- arch/x86/entry/calling.h | 8 ++++---- arch/x86/entry/entry_64_compat.S | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a97cc78ecb92..29b36e9e4e74 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0 +.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ + pushq \rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ pushq %r9 /* pt_regs->r9 */ @@ -115,8 +115,8 @@ For 32-bit we have the following conventions - kernel is built with .endm -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 + PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret CLEAR_REGS .endm diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index ed2be3615b50..f76e674d22c4 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -200,7 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ - PUSH_AND_CLEAR_REGS rax=$-ENOSYS + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS UNWIND_HINT_REGS movq %rsp, %rdi -- cgit v1.2.3