diff options
Diffstat (limited to 'arch')
28 files changed, 224 insertions, 85 deletions
diff --git a/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts b/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts index dace8ffeb991..0a4ffd10c484 100644 --- a/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts +++ b/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts @@ -581,7 +581,7 @@ * EBI2. This has a 25MHz chrystal next to it, so no * clocking is needed. */ - ethernet-ebi2@2,0 { + ethernet@2,0 { compatible = "smsc,lan9221", "smsc,lan9115"; reg = <2 0x0 0x100>; /* @@ -598,8 +598,6 @@ phy-mode = "mii"; reg-io-width = <2>; smsc,force-external-phy; - /* IRQ on edge falling = active low */ - smsc,irq-active-low; smsc,irq-push-pull; /* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e07e7de9ac49..b5b13a932561 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1605,7 +1605,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 - depends on !(CC_IS_CLANG && GCOV_KERNEL) + # https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9 + depends on !CC_IS_CLANG || CLANG_VERSION >= 120000 depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help Build the kernel with Branch Target Identification annotations diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a9c0716e7440..a074459f8f2f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -47,7 +47,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h index 99ad77df8f52..97ddc6c203b7 100644 --- a/arch/arm64/include/asm/smp_plat.h +++ b/arch/arm64/include/asm/smp_plat.h @@ -10,6 +10,7 @@ #include <linux/cpumask.h> +#include <asm/smp.h> #include <asm/types.h> struct mpidr_hash { diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cce308586fcc..3f1490bfb938 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,7 @@ CFLAGS_syscall.o += -fno-stack-protector # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. -KCOV_INSTRUMENT_entry.o := n +KCOV_INSTRUMENT_entry-common.o := n KCOV_INSTRUMENT_idle.o := n # Object file lists. diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 125d5c9471ac..0ead8bfedf20 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -81,6 +81,7 @@ #include <asm/mmu_context.h> #include <asm/mte.h> #include <asm/processor.h> +#include <asm/smp.h> #include <asm/sysreg.h> #include <asm/traps.h> #include <asm/virt.h> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 12ce14a98b7c..db8b2e2d02c2 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -604,7 +604,7 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs) __el0_fiq_handler_common(regs); } -static void __el0_error_handler_common(struct pt_regs *regs) +static void noinstr __el0_error_handler_common(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 69b3fde8759e..36f51b0e438a 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -193,18 +193,6 @@ void mte_check_tfsr_el1(void) } #endif -static void update_gcr_el1_excl(u64 excl) -{ - - /* - * Note that the mask controlled by the user via prctl() is an - * include while GCR_EL1 accepts an exclude mask. - * No need for ISB since this only affects EL0 currently, implicit - * with ERET. - */ - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); -} - static void set_gcr_el1_excl(u64 excl) { current->thread.gcr_user_excl = excl; @@ -265,7 +253,8 @@ void mte_suspend_exit(void) if (!system_supports_mte()) return; - update_gcr_el1_excl(gcr_kernel_excl); + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl); + isb(); } long set_mte_ctrl(struct task_struct *task, unsigned long arg) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 95cd62d67371..2cf999e41d30 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -29,7 +29,7 @@ .endm .macro ldrh1 reg, ptr, val - user_ldst 9998f, ldtrh, \reg, \ptr, \val + user_ldst 9997f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val @@ -37,7 +37,7 @@ .endm .macro ldr1 reg, ptr, val - user_ldst 9998f, ldtr, \reg, \ptr, \val + user_ldst 9997f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val @@ -45,7 +45,7 @@ .endm .macro ldp1 reg1, reg2, ptr, val - user_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val @@ -53,8 +53,10 @@ .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 // Nothing to copy ret @@ -63,6 +65,11 @@ EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) + strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 1f61cd0df062..dbea3799c3ef 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -30,33 +30,34 @@ .endm .macro ldrh1 reg, ptr, val - user_ldst 9998f, ldtrh, \reg, \ptr, \val + user_ldst 9997f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val - user_ldst 9998f, sttrh, \reg, \ptr, \val + user_ldst 9997f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val - user_ldst 9998f, ldtr, \reg, \ptr, \val + user_ldst 9997f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val - user_ldst 9998f, sttr, \reg, \ptr, \val + user_ldst 9997f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val - user_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val - user_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 - +srcin .req x15 SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -65,6 +66,12 @@ EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 043da90f5dd7..9f380eecf653 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -32,7 +32,7 @@ .endm .macro strh1 reg, ptr, val - user_ldst 9998f, sttrh, \reg, \ptr, \val + user_ldst 9997f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val @@ -40,7 +40,7 @@ .endm .macro str1 reg, ptr, val - user_ldst 9998f, sttr, \reg, \ptr, \val + user_ldst 9997f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val @@ -48,12 +48,14 @@ .endm .macro stp1 reg1, reg2, ptr, val - user_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -62,6 +64,12 @@ EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder + ldrb tmp1w, [srcin] +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 35fbdb7d6e1a..1648790e91b3 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/mte-def.h> /* Assumptions: * @@ -42,7 +43,16 @@ #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 +/* + * When KASAN_HW_TAGS is in use, memory is checked at MTE_GRANULE_SIZE + * (16-byte) granularity, and we must ensure that no access straddles this + * alignment boundary. + */ +#ifdef CONFIG_KASAN_HW_TAGS +#define MIN_PAGE_SIZE MTE_GRANULE_SIZE +#else #define MIN_PAGE_SIZE 4096 +#endif /* Since strings are short on average, we check the first 16 bytes of the string for a NUL character. In order to do an unaligned ldp diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 08f9dd6903b7..86310d6e1035 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -76,7 +76,7 @@ static inline int __enable_fpu(enum fpu_mode mode) /* we only have a 32-bit FPU */ return SIGFPE; #endif - fallthrough; + /* fallthrough */ case FPU_32BIT: if (cpu_has_fre) { /* clear FRE */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index cd4afcdf3725..9adad24c2e65 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1383,6 +1383,7 @@ static void build_r4000_tlb_refill_handler(void) switch (boot_cpu_type()) { default: if (sizeof(long) == 4) { + fallthrough; case CPU_LOONGSON2EF: /* Loongson2 ebase is different than r4k, we have more space */ if ((p - tlb_handler) > 64) @@ -2169,6 +2170,7 @@ static void build_r4000_tlb_load_handler(void) default: if (cpu_has_mips_r2_exec_hazard) { uasm_i_ehb(&p); + fallthrough; case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdfea6d6ab69..3256a316e884 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -146,6 +146,7 @@ static inline void psurge_clr_ipi(int cpu) switch(psurge_type) { case PSURGE_DUAL: out_8(psurge_sec_intr, ~0); + break; case PSURGE_NONE: break; default: diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bbf8622bbf5d..bd3ef121c379 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -126,6 +126,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, case DIE_SSTEP: if (uprobe_post_sstep_notifier(regs)) return NOTIFY_STOP; + break; default: break; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c42613cfb5ba..739be5da3bca 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.anythread_deprecated = 1; + if (cap.version) + edx.split.anythread_deprecated = 1; edx.split.reserved1 = 0; edx.split.reserved2 = 0; @@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; - if (!g_phys_as) + /* + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as + * the guest operates in the same PA space as the host, i.e. + * reductions in MAXPHYADDR for memory encryption affect shadow + * paging, too. + * + * If TDP is enabled but an explicit guest MAXPHYADDR is not + * provided, use the raw bare metal MAXPHYADDR as reductions to + * the HPAs do not affect GPAs. + */ + if (!tdp_enabled) + g_phys_as = boot_cpu_data.x86_phys_bits; + else if (!g_phys_as) g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); @@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0x8000001a: case 0x8000001e: break; - /* Support memory encryption cpuid if host supports it */ case 0x8000001F: - if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) + if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; - else + } else { cpuid_entry_override(entry, CPUID_8000_001F_EAX); + + /* + * Enumerate '0' for "PA bits reduction", the adjusted + * MAXPHYADDR is enumerated directly (see 0x80000008). + */ + entry->ebx &= ~GENMASK(11, 6); + } break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 845d114ae075..66f7f5bc3482 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -53,6 +53,8 @@ #include <asm/kvm_page_track.h> #include "trace.h" +#include "paging.h" + extern bool itlb_multihit_kvm_mitigation; int __read_mostly nx_huge_pages = -1; diff --git a/arch/x86/kvm/mmu/paging.h b/arch/x86/kvm/mmu/paging.h new file mode 100644 index 000000000000..de8ab323bb70 --- /dev/null +++ b/arch/x86/kvm/mmu/paging.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Shadow paging constants/helpers that don't need to be #undef'd. */ +#ifndef __KVM_X86_PAGING_H +#define __KVM_X86_PAGING_H + +#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) +#define PT64_LVL_ADDR_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#define PT64_LVL_OFFSET_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#endif /* __KVM_X86_PAGING_H */ + diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 490a028ddabe..ee044d357b5f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -24,7 +24,7 @@ #define pt_element_t u64 #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) @@ -57,7 +57,7 @@ #define pt_element_t u64 #define guest_walker guest_walkerEPT #define FNAME(name) ept_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 7a5ce9314107..eb7b227fc6cf 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #else #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #endif -#define PT64_LVL_ADDR_MASK(level) \ - (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) -#define PT64_LVL_OFFSET_MASK(level) \ - (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | shadow_x_mask | shadow_nx_mask | shadow_me_mask) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 21d03e3a5dfd..3bd09c50c98b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm) for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; + + /* If SMI is not intercepted, ignore guest SMI intercept as well */ + if (!intercept_smi) + vmcb_clr_intercept(c, INTERCEPT_SMI); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, @@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, return true; } -static void nested_load_control_from_vmcb12(struct vcpu_svm *svm, - struct vmcb_control_area *control) +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control) { copy_vmcb_control_area(&svm->nested.ctl, control); @@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) struct kvm_host_map map; u64 vmcb12_gpa; + if (!svm->nested.hsave_msr) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (is_smm(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -692,6 +701,27 @@ out: return ret; } +/* Copy state save area fields which are handled by VMRUN */ +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save) +{ + to_save->es = from_save->es; + to_save->cs = from_save->cs; + to_save->ss = from_save->ss; + to_save->ds = from_save->ds; + to_save->gdtr = from_save->gdtr; + to_save->idtr = from_save->idtr; + to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; + to_save->efer = from_save->efer; + to_save->cr0 = from_save->cr0; + to_save->cr3 = from_save->cr3; + to_save->cr4 = from_save->cr4; + to_save->rax = from_save->rax; + to_save->rsp = from_save->rsp; + to_save->rip = from_save->rip; + to_save->cpl = 0; +} + void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; @@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm->vmcb01.ptr->save.es = save->es; - svm->vmcb01.ptr->save.cs = save->cs; - svm->vmcb01.ptr->save.ss = save->ss; - svm->vmcb01.ptr->save.ds = save->ds; - svm->vmcb01.ptr->save.gdtr = save->gdtr; - svm->vmcb01.ptr->save.idtr = save->idtr; - svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED; - svm->vmcb01.ptr->save.efer = save->efer; - svm->vmcb01.ptr->save.cr0 = save->cr0; - svm->vmcb01.ptr->save.cr3 = save->cr3; - svm->vmcb01.ptr->save.cr4 = save->cr4; - svm->vmcb01.ptr->save.rax = save->rax; - svm->vmcb01.ptr->save.rsp = save->rsp; - svm->vmcb01.ptr->save.rip = save->rip; - svm->vmcb01.ptr->save.cpl = 0; - + svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm); - kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 62926f1a5f7b..6710d9ee2e4b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Pin guest memory */ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) - return -EFAULT; + if (IS_ERR(guest_page)) + return PTR_ERR(guest_page); /* allocate memory for header and transport buffer */ ret = -ENOMEM; @@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) } /* Copy packet header to userspace. */ - ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, - params.hdr_len); + if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, + params.hdr_len)) + ret = -EFAULT; e_free_trans_data: kfree(trans_data); @@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) data.trans_len = params.trans_len; /* Pin guest memory */ - ret = -EFAULT; guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) + if (IS_ERR(guest_page)) { + ret = PTR_ERR(guest_page); goto e_free_trans; + } /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8834822c00cd..664d20f0689c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -198,6 +198,11 @@ module_param(avic, bool, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); + +bool intercept_smi = true; +module_param(intercept_smi, bool, 0444); + + static bool svm_gp_erratum_intercept = true; static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_intercept(svm, INTERCEPT_INTR); svm_set_intercept(svm, INTERCEPT_NMI); - svm_set_intercept(svm, INTERCEPT_SMI); + + if (intercept_smi) + svm_set_intercept(svm, INTERCEPT_SMI); + svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0); svm_set_intercept(svm, INTERCEPT_RDPMC); svm_set_intercept(svm, INTERCEPT_CPUID); @@ -1923,7 +1931,7 @@ static int npf_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); + u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; trace_kvm_page_fault(fault_address, error_code); @@ -2106,6 +2114,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu) return 1; } +static int smi_interception(struct kvm_vcpu *vcpu) +{ + return 1; +} + static int intr_interception(struct kvm_vcpu *vcpu) { ++vcpu->stat.irq_exits; @@ -2941,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm_disable_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: - svm->nested.hsave_msr = data; + /* + * Old kernels did not validate the value written to + * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid + * value to allow live migrating buggy or malicious guests + * originating from those kernels. + */ + if (!msr->host_initiated && !page_address_valid(vcpu, data)) + return 1; + + svm->nested.hsave_msr = data & PAGE_MASK; break; case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); @@ -3080,8 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, - [SVM_EXIT_SMI] = kvm_emulate_as_nop, - [SVM_EXIT_INIT] = kvm_emulate_as_nop, + [SVM_EXIT_SMI] = smi_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, [SVM_EXIT_CPUID] = kvm_emulate_cpuid, @@ -4288,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map_save; int ret; if (is_guest_mode(vcpu)) { @@ -4303,6 +4325,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ret = nested_svm_vmexit(svm); if (ret) return ret; + + /* + * KVM uses VMCB01 to store L1 host state while L2 runs but + * VMCB01 is going to be used during SMM and thus the state will + * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save + * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the + * format of the area is identical to guest save area offsetted + * by 0x400 (matches the offset of 'struct vmcb_save_area' + * within 'struct vmcb'). Note: HSAVE area may also be used by + * L1 hypervisor to save additional host context (e.g. KVM does + * that, see svm_prepare_guest_switch()) which must be + * preserved. + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); + + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); + + kvm_vcpu_unmap(vcpu, &map_save, true); } return 0; } @@ -4310,13 +4355,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_host_map map; + struct kvm_host_map map, map_save; int ret = 0; if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); + struct vmcb *vmcb12; if (guest) { if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) @@ -4332,8 +4378,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (svm_allocate_nested(svm)) return 1; - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva); + vmcb12 = map.hva; + + nested_load_control_from_vmcb12(svm, &vmcb12->control); + + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); kvm_vcpu_unmap(vcpu, &map, true); + + /* + * Restore L1 host state from L1 HSAVE area as VMCB01 was + * used during SMM (see svm_enter_smm()) + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); + + kvm_vcpu_unmap(vcpu, &map_save, true); } } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f89b623bb591..7e2090752d8f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -31,6 +31,7 @@ #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; +extern bool intercept_smi; /* * Clean bits in VMCB. @@ -463,6 +464,8 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); @@ -479,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control); void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 3979a947933a..db88ed4f2121 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,8 +14,6 @@ #include "vmx_ops.h" #include "cpuid.h" -extern const u32 vmx_msr_index[]; - #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6dc1b445231..a4fd10604f72 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9601,6 +9601,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; + } else if (unlikely(hw_breakpoint_active())) { + set_debugreg(0, 7); } for (;;) { @@ -10985,9 +10987,6 @@ int kvm_arch_hardware_setup(void *opaque) int r; rdmsrl_safe(MSR_EFER, &host_efer); - if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) && - !(host_efer & EFER_NX))) - return -EIO; if (boot_cpu_has(X86_FEATURE_XSAVES)) rdmsrl(MSR_IA32_XSS, host_xss); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e835164189f1..4b951458c9fc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) for (i = 0; i < prog->aux->size_poke_tab; i++) { poke = &prog->aux->poke_tab[i]; + if (poke->aux && poke->aux != prog->aux) + continue; + WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); if (poke->reason != BPF_POKE_REASON_TAIL_CALL) |