From 9f98ddd6686cc9469fb73b11ddd403271d65cbdf Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:39 +0100 Subject: kvm: arm64: Add helper for loading the stage2 setting for a VM We load the stage2 context of a guest for different operations, including running the guest and tlb maintenance on behalf of the guest. As of now only the vttbr is private to the guest, but this is about to change with IPA per VM. Add a helper to load the stage2 configuration for a VM, which could do the right thing with the future changes. Cc: Christoffer Dall Cc: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 2 +- arch/arm64/kvm/hyp/tlb.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ca46153d7915..9d5ce1a3039a 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void) static void __hyp_text __activate_vm(struct kvm *kvm) { - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); } static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 131c7772703c..4dbd9c69a96d 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so * let's flip TGE before executing the TLB operation. */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); @@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) { - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); isb(); } -- cgit v1.2.3 From b2df44ffba363bd90274340e0adfd8137f5cd878 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:41 +0100 Subject: kvm: arm64: Clean up VTCR_EL2 initialisation Use the new helper for converting the parange to the physical shift. Also, add the missing definitions for the VTCR_EL2 register fields and use them instead of hard coding numbers. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 3 +++ arch/arm64/kvm/hyp/s2-setup.c | 34 ++++++++-------------------------- 2 files changed, 11 insertions(+), 26 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..5f807b680a5f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,6 +107,7 @@ #define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) +#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK #define VTCR_EL2_TG0_MASK TCR_TG0_MASK #define VTCR_EL2_TG0_4K TCR_TG0_4K @@ -127,6 +128,8 @@ #define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) #define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) +#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x) + /* * We configure the Stage-2 page tables to always restrict the IPA space to be * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index 603e1ee83e89..e1ca672e937a 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -19,45 +19,27 @@ #include #include #include +#include u32 __hyp_text __init_stage2_translation(void) { u64 val = VTCR_EL2_FLAGS; u64 parange; + u32 phys_shift; u64 tmp; /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS - * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while - * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... + * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, but the + * allocated values are limited to 3bits. */ parange = read_sysreg(id_aa64mmfr0_el1) & 7; if (parange > ID_AA64MMFR0_PARANGE_MAX) parange = ID_AA64MMFR0_PARANGE_MAX; - val |= parange << 16; + val |= parange << VTCR_EL2_PS_SHIFT; /* Compute the actual PARange... */ - switch (parange) { - case 0: - parange = 32; - break; - case 1: - parange = 36; - break; - case 2: - parange = 40; - break; - case 3: - parange = 42; - break; - case 4: - parange = 44; - break; - case 5: - default: - parange = 48; - break; - } + phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); /* * ... and clamp it to 40 bits, unless we have some braindead @@ -65,7 +47,7 @@ u32 __hyp_text __init_stage2_translation(void) * return that value for the rest of the kernel to decide what * to do. */ - val |= 64 - (parange > 40 ? 40 : parange); + val |= VTCR_EL2_T0SZ(phys_shift > 40 ? 40 : phys_shift); /* * Check the availability of Hardware Access Flag / Dirty Bit @@ -86,5 +68,5 @@ u32 __hyp_text __init_stage2_translation(void) write_sysreg(val, vtcr_el2); - return parange; + return phys_shift; } -- cgit v1.2.3 From 5b6c6742b5350a6fb5c631fb99a6bc046a62739c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:42 +0100 Subject: kvm: arm/arm64: Allow arch specific configurations for VM Allow the arch backends to perform VM specific initialisation. This will be later used to handle IPA size configuration and per-VM VTCR configuration on arm64. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 7 +++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/reset.c | 7 +++++++ virt/kvm/arm/arm.c | 5 +++-- 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3ad482d2f1eb..72d46418e1ef 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -354,4 +354,11 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + return 0; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..b04280ae1be0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -513,4 +513,6 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +int kvm_arm_config_vm(struct kvm *kvm, unsigned long type); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e37c78bbe1ca..b0c07dab5cb3 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -133,3 +133,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } + +int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + return 0; +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..327d0fd28380 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - if (type) - return -EINVAL; + ret = kvm_arm_config_vm(kvm, type); + if (ret) + return ret; kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); if (!kvm->arch.last_vcpu_ran) -- cgit v1.2.3 From 7665f3a8491b0ed3c6f65c0bc3a5424ea8f87731 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:43 +0100 Subject: kvm: arm64: Configure VTCR_EL2 per VM Add support for setting the VTCR_EL2 per VM, rather than hard coding a value at boot time per CPU. This would allow us to tune the stage2 page table parameters per VM in the later changes. We compute the VTCR fields based on the system wide sanitised feature registers, except for the hardware management of Access Flags (VTCR_EL2.HA). It is fine to run a system with a mix of CPUs that may or may not update the page table Access Flags. Since the bit is RES0 on CPUs that don't support it, the bit should be ignored on them. Suggested-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/include/asm/kvm_host.h | 12 ++++--- arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/kvm/hyp/Makefile | 1 - arch/arm64/kvm/hyp/s2-setup.c | 72 --------------------------------------- arch/arm64/kvm/reset.c | 35 +++++++++++++++++++ 7 files changed, 45 insertions(+), 81 deletions(-) delete mode 100644 arch/arm64/kvm/hyp/s2-setup.c (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5f807b680a5f..14317b3a1820 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -135,8 +135,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time - * (see hyp-init.S). + * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_config_vm(). * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 102b5a5c47b6..0b53c72e7591 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -72,8 +72,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern u32 __init_stage2_translation(void); - /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ #define __hyp_this_cpu_ptr(sym) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b04280ae1be0..5ecd457bce7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -61,11 +61,13 @@ struct kvm_arch { u64 vmid_gen; u32 vmid; - /* 1-level 2nd stage table, protected by kvm->mmu_lock */ + /* stage2 entry level table */ pgd_t *pgd; /* VTTBR value associated with above pgd and vmid */ u64 vttbr; + /* VTCR_EL2 value for this VM */ + u64 vtcr; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -442,10 +444,12 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, static inline void __cpu_init_stage2(void) { - u32 parange = kvm_call_hyp(__init_stage2_translation); + u32 ps; - WARN_ONCE(parange < 40, - "PARange is %d bits, unsupported configuration!", parange); + /* Sanity check for minimum IPA size support */ + ps = id_aa64mmfr0_parange_to_phys_shift(read_sysreg(id_aa64mmfr0_el1) & 0x7); + WARN_ONCE(ps < 40, + "PARange is %d bits, unsupported configuration!", ps); } /* Guest/host FPSIMD coordination helpers */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index d1bd1e0f14d7..23aca66767f9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -161,6 +161,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...); */ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) { + write_sysreg(kvm->arch.vtcr, vtcr_el2); write_sysreg(kvm->arch.vttbr, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 2fabc2dc1966..82d1904328ad 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o -obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c deleted file mode 100644 index e1ca672e937a..000000000000 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2016 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include - -u32 __hyp_text __init_stage2_translation(void) -{ - u64 val = VTCR_EL2_FLAGS; - u64 parange; - u32 phys_shift; - u64 tmp; - - /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS - * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, but the - * allocated values are limited to 3bits. - */ - parange = read_sysreg(id_aa64mmfr0_el1) & 7; - if (parange > ID_AA64MMFR0_PARANGE_MAX) - parange = ID_AA64MMFR0_PARANGE_MAX; - val |= parange << VTCR_EL2_PS_SHIFT; - - /* Compute the actual PARange... */ - phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - - /* - * ... and clamp it to 40 bits, unless we have some braindead - * HW that implements less than that. In all cases, we'll - * return that value for the rest of the kernel to decide what - * to do. - */ - val |= VTCR_EL2_T0SZ(phys_shift > 40 ? 40 : phys_shift); - - /* - * Check the availability of Hardware Access Flag / Dirty Bit - * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. - */ - tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; - if (tmp) - val |= VTCR_EL2_HA; - - /* - * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS - * bit in VTCR_EL2. - */ - tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf; - val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; - - write_sysreg(val, vtcr_el2); - - return phys_shift; -} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index b0c07dab5cb3..616120c4176b 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -26,6 +26,7 @@ #include +#include #include #include #include @@ -134,9 +135,43 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return kvm_timer_vcpu_reset(vcpu); } +/* + * Configure the VTCR_EL2 for this VM. The VTCR value is common + * across all the physical CPUs on the system. We use system wide + * sanitised values to fill in different fields, except for Hardware + * Management of Access Flags. HA Flag is set unconditionally on + * all CPUs, as it is safe to run with or without the feature and + * the bit is RES0 on CPUs that don't support it. + */ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) { + u64 vtcr = VTCR_EL2_FLAGS; + u32 parange, phys_shift; + if (type) return -EINVAL; + + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; + vtcr |= parange << VTCR_EL2_PS_SHIFT; + + phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); + if (phys_shift > KVM_PHYS_SHIFT) + phys_shift = KVM_PHYS_SHIFT; + vtcr |= VTCR_EL2_T0SZ(phys_shift); + + /* + * Enable the Hardware Access Flag management, unconditionally + * on all CPUs. The features is RES0 on CPUs without the support + * and must be ignored by the CPUs. + */ + vtcr |= VTCR_EL2_HA; + + /* Set the vmid bits */ + vtcr |= (kvm_get_vmid_bits() == 16) ? + VTCR_EL2_VS_16BIT : + VTCR_EL2_VS_8BIT; + kvm->arch.vtcr = vtcr; return 0; } -- cgit v1.2.3 From 7e8130456e067f49693fdcc25b5ba242a9c5568b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:48 +0100 Subject: kvm: arm64: Configure VTCR_EL2.SL0 per VM VTCR_EL2 holds the following key stage2 translation table parameters: SL0 - Entry level in the page table lookup. T0SZ - Denotes the size of the memory addressed by the table. We have been using fixed values for the SL0 depending on the page size as we have a fixed IPA size. But since we are about to make it dynamic, we need to calculate the SL0 at runtime per VM. This patch adds a helper to compute the value of SL0 for a VM based on the IPA size. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 67 ++++++++++++++++++++++++++++------------ arch/arm64/kvm/reset.c | 1 + 2 files changed, 49 insertions(+), 19 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b236d90ca056..f913adb44f93 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -121,7 +121,6 @@ #define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA #define VTCR_EL2_SL0_SHIFT 6 #define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) -#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT) #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_VS_SHIFT 19 #define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) @@ -144,30 +143,60 @@ #define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) -#ifdef CONFIG_ARM64_64K_PAGES /* - * Stage2 translation configuration: - * 64kB pages (TG0 = 1) - * 2 level page tables (SL = 1) + * VTCR_EL2:SL0 indicates the entry level for Stage2 translation. + * Interestingly, it depends on the page size. + * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a + * + * ----------------------------------------- + * | Entry level | 4K | 16K/64K | + * ------------------------------------------ + * | Level: 0 | 2 | - | + * ------------------------------------------ + * | Level: 1 | 1 | 2 | + * ------------------------------------------ + * | Level: 2 | 0 | 1 | + * ------------------------------------------ + * | Level: 3 | - | 0 | + * ------------------------------------------ + * + * The table roughly translates to : + * + * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level + * + * Where TGRAN_SL0_BASE is a magic number depending on the page size: + * TGRAN_SL0_BASE(4K) = 2 + * TGRAN_SL0_BASE(16K) = 3 + * TGRAN_SL0_BASE(64K) = 3 + * provided we take care of ruling out the unsupported cases and + * Entry_Level = 4 - Number_of_levels. + * */ -#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) +#ifdef CONFIG_ARM64_64K_PAGES + +#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K +#define VTCR_EL2_TGRAN_SL0_BASE 3UL + #elif defined(CONFIG_ARM64_16K_PAGES) -/* - * Stage2 translation configuration: - * 16kB pages (TG0 = 2) - * 2 level page tables (SL = 1) - */ -#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1) + +#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K +#define VTCR_EL2_TGRAN_SL0_BASE 3UL + #else /* 4K */ -/* - * Stage2 translation configuration: - * 4kB pages (TG0 = 0) - * 3 level page tables (SL = 1) - */ -#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1) + +#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K +#define VTCR_EL2_TGRAN_SL0_BASE 2UL + #endif -#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) +#define VTCR_EL2_LVLS_TO_SL0(levels) \ + ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT) +#define VTCR_EL2_SL0_TO_LVLS(sl0) \ + ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE) +#define VTCR_EL2_LVLS(vtcr) \ + VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT) + +#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN) /* * ARM VMSAv8-64 defines an algorithm for finding the translation table * descriptors in section D4.2.8 in ARM DDI 0487C.a. diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 616120c4176b..1ced1e37374e 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -160,6 +160,7 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) if (phys_shift > KVM_PHYS_SHIFT) phys_shift = KVM_PHYS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); + vtcr |= VTCR_EL2_LVLS_TO_SL0(kvm_stage2_levels(kvm)); /* * Enable the Hardware Access Flag management, unconditionally -- cgit v1.2.3 From 13ac4bbcc457d3925b4031cc70e3031fd8b9c3b7 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:49 +0100 Subject: kvm: arm64: Switch to per VM IPA limit Now that we can manage the stage2 page table per VM, switch the configuration details to per VM instance. The VTCR is updated with the values specific to the VM based on the configuration. We store the IPA size and the number of stage2 page table levels for the guest already in VTCR. Decode it back from the vtcr field wherever we need it. Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 2 ++ arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/include/asm/stage2_pgtable.h | 2 +- arch/arm64/kvm/reset.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index f913adb44f93..e4240568cc18 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -197,6 +197,8 @@ VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT) #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN) +#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK)) + /* * ARM VMSAv8-64 defines an algorithm for finding the translation table * descriptors in section D4.2.8 in ARM DDI 0487C.a. diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ac3ca9690bad..77b1af9e64db 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -142,7 +142,7 @@ static inline unsigned long __kern_hyp_va(unsigned long v) */ #define KVM_PHYS_SHIFT (40) -#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT +#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 36a0a1165003..c62fe118a898 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -43,7 +43,7 @@ */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) #define STAGE2_PGTABLE_LEVELS stage2_pgtable_levels(KVM_PHYS_SHIFT) -#define kvm_stage2_levels(kvm) stage2_pgtable_levels(kvm_phys_shift(kvm)) +#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) /* * With all the supported VA_BITs and 40bit guest IPA, the following condition diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 1ced1e37374e..2bf41e007390 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -160,7 +160,7 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) if (phys_shift > KVM_PHYS_SHIFT) phys_shift = KVM_PHYS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); - vtcr |= VTCR_EL2_LVLS_TO_SL0(kvm_stage2_levels(kvm)); + vtcr |= VTCR_EL2_LVLS_TO_SL0(stage2_pgtable_levels(phys_shift)); /* * Enable the Hardware Access Flag management, unconditionally -- cgit v1.2.3 From bc1d7de8c550db2f8fd59458a07fefa863358b8d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:51 +0100 Subject: kvm: arm64: Add 52bit support for PAR to HPFAR conversoin Add support for handling 52bit addresses in PAR to HPFAR conversion. Instead of hardcoding the address limits, we now use PHYS_MASK_SHIFT. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 7 +++++++ arch/arm64/kvm/hyp/switch.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e4240568cc18..f1330284720d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -311,6 +311,13 @@ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) +/* + * We have + * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] + * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + */ +#define PAR_TO_HPFAR(par) \ + (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) #define kvm_arm_exception_type \ {0, "IRQ" }, \ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 9d5ce1a3039a..7cc175c88a37 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) return false; /* Translation failed, back to guest */ /* Convert PAR to HPFAR format */ - *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; + *hpfar = PAR_TO_HPFAR(tmp); return true; } -- cgit v1.2.3 From 0f62f0e95be29200ab2ab98ca870e22c9b148dfa Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:52 +0100 Subject: kvm: arm64: Set a limit on the IPA size So far we have restricted the IPA size of the VM to the default value (40bits). Now that we can manage the IPA size per VM and support dynamic stage2 page tables, we can allow VMs to have larger IPA. This patch introduces a the maximum IPA size supported on the host. This is decided by the following factors : 1) Maximum PARange supported by the CPUs - This can be inferred from the system wide safe value. 2) Maximum PA size supported by the host kernel (48 vs 52) 3) Number of levels in the host page table (as we base our stage2 tables on the host table helpers). Since the stage2 page table code is dependent on the stage1 page table, we always ensure that : Number of Levels at Stage1 >= Number of Levels at Stage2 So we limit the IPA to make sure that the above condition is satisfied. This will affect the following combinations of VA_BITS and IPA for different page sizes. Host configuration | Unsupported IPA ranges 39bit VA, 4K | [44, 48] 36bit VA, 16K | [41, 48] 42bit VA, 64K | [47, 52] Supporting the above combinations need independent stage2 page table manipulation code, which would need substantial changes. We could purse the solution independently and switch the page table code once we have it ready. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 12 +++-------- arch/arm64/kvm/reset.c | 43 +++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 2 ++ 4 files changed, 50 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 12ae5fbbcf01..5ad1a54f98dc 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -358,6 +358,8 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline void kvm_set_ipa_limit(void) {} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5ecd457bce7d..f008f8866b2a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -442,15 +442,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) -{ - u32 ps; - - /* Sanity check for minimum IPA size support */ - ps = id_aa64mmfr0_parange_to_phys_shift(read_sysreg(id_aa64mmfr0_el1) & 0x7); - WARN_ONCE(ps < 40, - "PARange is %d bits, unsupported configuration!", ps); -} +static inline void __cpu_init_stage2(void) {} /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); @@ -513,6 +505,8 @@ static inline int kvm_arm_have_ssbd(void) void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_set_ipa_limit(void); + #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 2bf41e007390..96b3f50101bc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -34,6 +34,9 @@ #include #include +/* Maximum phys_shift supported for any VM on this host */ +static u32 kvm_ipa_limit; + /* * ARMv8 Reset Values */ @@ -135,6 +138,46 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return kvm_timer_vcpu_reset(vcpu); } +void kvm_set_ipa_limit(void) +{ + unsigned int ipa_max, pa_max, va_max, parange; + + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; + pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); + + /* Clamp the IPA limit to the PA size supported by the kernel */ + ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; + /* + * Since our stage2 table is dependent on the stage1 page table code, + * we must always honor the following condition: + * + * Number of levels in Stage1 >= Number of levels in Stage2. + * + * So clamp the ipa limit further down to limit the number of levels. + * Since we can concatenate upto 16 tables at entry level, we could + * go upto 4bits above the maximum VA addressible with the current + * number of levels. + */ + va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; + va_max += 4; + + if (va_max < ipa_max) + ipa_max = va_max; + + /* + * If the final limit is lower than the real physical address + * limit of the CPUs, report the reason. + */ + if (ipa_max < pa_max) + pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", + (va_max < pa_max) ? "Virtual" : "Physical"); + + WARN(ipa_max < KVM_PHYS_SHIFT, + "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); + kvm_ipa_limit = ipa_max; + kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); +} + /* * Configure the VTCR_EL2 for this VM. The VTCR value is common * across all the physical CPUs on the system. We use system wide diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 43e716bc3f08..631f9a3ad99a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1413,6 +1413,8 @@ static int init_common_resources(void) kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_set_ipa_limit(); + return 0; } -- cgit v1.2.3 From 58b3efc820acd3219e89ff014e93346a734229b8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:53 +0100 Subject: kvm: arm64: Limit the minimum number of page table levels Since we are about to remove the lower limit on the IPA size, make sure that we do not go to 1 level page table (e.g, with 32bit IPA on 64K host with concatenation) to avoid splitting the host PMD huge pages at stage2. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/stage2_pgtable.h | 7 ++++++- arch/arm64/kvm/reset.c | 10 +++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index c62fe118a898..2cce769ba4c6 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -72,8 +72,13 @@ /* * The number of PTRS across all concatenated stage2 tables given by the * number of bits resolved at the initial level. + * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA), + * in which case, stage2_pgd_ptrs will have one entry. */ -#define __s2_pgd_ptrs(ipa, lvls) (1 << ((ipa) - pt_levels_pgdir_shift((lvls)))) +#define pgd_ptrs_shift(ipa, pgdir_shift) \ + ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0) +#define __s2_pgd_ptrs(ipa, lvls) \ + (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls)))) #define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t)) #define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 96b3f50101bc..f156e45760bc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -190,6 +190,7 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) { u64 vtcr = VTCR_EL2_FLAGS; u32 parange, phys_shift; + u8 lvls; if (type) return -EINVAL; @@ -203,7 +204,14 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) if (phys_shift > KVM_PHYS_SHIFT) phys_shift = KVM_PHYS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); - vtcr |= VTCR_EL2_LVLS_TO_SL0(stage2_pgtable_levels(phys_shift)); + /* + * Use a minimum 2 level page table to prevent splitting + * host PMD huge pages at stage2. + */ + lvls = stage2_pgtable_levels(phys_shift); + if (lvls < 2) + lvls = 2; + vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); /* * Enable the Hardware Access Flag management, unconditionally -- cgit v1.2.3 From 233a7cb235318223df8133235383f4c595c654c1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:54 +0100 Subject: kvm: arm64: Allow tuning the physical address size for VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow specifying the physical address size limit for a new VM via the kvm_type argument for the KVM_CREATE_VM ioctl. This allows us to finalise the stage2 page table as early as possible and hence perform the right checks on the memory slots without complication. The size is encoded as Log2(PA_Size) in bits[7:0] of the type field. For backward compatibility the value 0 is reserved and implies 40bits. Also, lift the limit of the IPA to host limit and allow lower IPA sizes (e.g, 32). The userspace could check the extension KVM_CAP_ARM_VM_IPA_SIZE for the availability of this feature. The cap check returns the maximum limit for the physical address shift supported by the host. Cc: Marc Zyngier Cc: Christoffer Dall Cc: Peter Maydell Cc: Paolo Bonzini Cc: Radim Krčmář Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 31 +++++++++++++++++++++++++++++++ arch/arm64/include/asm/stage2_pgtable.h | 20 -------------------- arch/arm64/kvm/reset.c | 17 +++++++++++++---- include/uapi/linux/kvm.h | 10 ++++++++++ 4 files changed, 54 insertions(+), 24 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 647f94128a85..f6b0af55d010 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -123,6 +123,37 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the flag KVM_VM_MIPS_VZ. +On arm64, the physical address size for a VM (IPA Size limit) is limited +to 40bits by default. The limit can be configured if the host supports the +extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use +KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type +identifier, where IPA_Bits is the maximum width of any physical +address used by the VM. The IPA_Bits is encoded in bits[7-0] of the +machine type identifier. + +e.g, to configure a guest to use 48bit physical address size : + + vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48)); + +The requested size (IPA_Bits) must be : + 0 - Implies default size, 40bits (for backward compatibility) + + or + + N - Implies N bits, where N is a positive integer such that, + 32 <= N <= Host_IPA_Limit + +Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and +is dependent on the CPU capability and the kernel configuration. The limit can +be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION +ioctl() at run-time. + +Please note that configuring the IPA size does not affect the capability +exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects +size of the address translated by the stage2 level (guest physical to +host physical address translations). + + 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 2cce769ba4c6..d352f6df8d2c 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -42,28 +42,8 @@ * the range (IPA_SHIFT, IPA_SHIFT - 4). */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) -#define STAGE2_PGTABLE_LEVELS stage2_pgtable_levels(KVM_PHYS_SHIFT) #define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) -/* - * With all the supported VA_BITs and 40bit guest IPA, the following condition - * is always true: - * - * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS - * - * We base our stage-2 page table walker helpers on this assumption and - * fall back to using the host version of the helper wherever possible. - * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back - * to using the host version, since it is guaranteed it is not folded at host. - * - * If the condition breaks in the future, we can rearrange the host level - * definitions and reuse them for stage2. Till then... - */ -#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS -#error "Unsupported combination of guest IPA and host VA_BITS." -#endif - - /* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */ #define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm)) #define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm)) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f156e45760bc..95f28d5950e0 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -89,6 +89,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VCPU_EVENTS: r = 1; break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = kvm_ipa_limit; + break; default: r = 0; } @@ -192,17 +195,23 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) u32 parange, phys_shift; u8 lvls; - if (type) + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) return -EINVAL; + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < 32) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + } + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; if (parange > ID_AA64MMFR0_PARANGE_MAX) parange = ID_AA64MMFR0_PARANGE_MAX; vtcr |= parange << VTCR_EL2_PS_SHIFT; - phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - if (phys_shift > KVM_PHYS_SHIFT) - phys_shift = KVM_PHYS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); /* * Use a minimum 2 level page table to prevent splitting diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 251be353f950..95aa73ca65dc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -750,6 +750,15 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 +/* + * On arm64, machine type can be used to request the physical + * address size for the VM. Bits[7-0] are reserved for the guest + * PA size shift (i.e, log2(PA_Size)). For backward compatibility, + * value 0 implies the default IPA size, 40bits. + */ +#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL +#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) /* * ioctls for /dev/kvm fds: */ @@ -953,6 +962,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_ARM_VM_IPA_SIZE 160 /* returns maximum IPA bits for a VM */ #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From bca607ebc76af9540e4aad5b2241a7323354be43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 13:40:36 +0100 Subject: KVM: arm/arm64: Rename kvm_arm_config_vm to kvm_arm_setup_stage2 VM tends to be a very overloaded term in KVM, so let's keep it to describe the virtual machine. For the virtual memory setup, let's use the "stage2" suffix. Reviewed-by: Eric Auger Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 6 +++++- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/reset.c | 2 +- virt/kvm/arm/arm.c | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 72d46418e1ef..b45af481ccf7 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -354,8 +354,12 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); -static inline int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { + /* + * On 32bit ARM, VMs get a static 40bit IPA stage2 setup, + * so any non-zero value used as type is illegal. + */ if (type) return -EINVAL; return 0; diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index f1330284720d..6e324d1f1231 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -133,7 +133,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_config_vm(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f008f8866b2a..376a5b695467 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -511,6 +511,6 @@ void kvm_set_ipa_limit(void); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); -int kvm_arm_config_vm(struct kvm *kvm, unsigned long type); +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 95f28d5950e0..aa806d582552 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -189,7 +189,7 @@ void kvm_set_ipa_limit(void) * all CPUs, as it is safe to run with or without the feature and * the bit is RES0 on CPUs that don't support it. */ -int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { u64 vtcr = VTCR_EL2_FLAGS; u32 parange, phys_shift; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 631f9a3ad99a..91c464c9cd21 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,7 +120,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - ret = kvm_arm_config_vm(kvm, type); + ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; -- cgit v1.2.3 From f0725345e3e127032376e4fcb6b0fc893237fcef Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 9 Aug 2018 22:20:41 +0800 Subject: arm64: KVM: Remove some extra semicolon in kvm_target_cpu There are some extra semicolon in kvm_target_cpu, remove it. Signed-off-by: zhong jiang Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a74f84d09412 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -338,15 +338,15 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; - }; + } break; case ARM_CPU_IMP_APM: switch (part_number) { case APM_CPU_PART_POTENZA: return KVM_ARM_TARGET_XGENE_POTENZA; - }; + } break; - }; + } /* Return a default generic target */ return KVM_ARM_TARGET_GENERIC_V8; -- cgit v1.2.3 From 375bdd3b5d4f7cf146f0df1488b4671b141dd799 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:48 +0800 Subject: arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension() Rename kvm_arch_dev_ioctl_check_extension() to kvm_arch_vm_ioctl_check_extension(), because it does not have any relationship with device. Renaming this function can make code readable. Cc: James Morse Reviewed-by: Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/reset.c | 4 ++-- virt/kvm/arm/arm.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index b45af481ccf7..5ca5d9af0c26 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 376a5b695467..f84052f306af 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); +int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index aa806d582552..337d2fbc2f06 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -59,12 +59,12 @@ static bool cpu_has_32bit_el1(void) } /** - * kvm_arch_dev_ioctl_check_extension + * kvm_arch_vm_ioctl_check_extension * * We currently assume that the number of HW registers is uniform * across all CPUs (see cpuinfo_sanity_check). */ -int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4c5ff06b18f9..7c9d7b51ce89 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -241,7 +241,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; default: - r = kvm_arch_dev_ioctl_check_extension(kvm, ext); + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; } return r; -- cgit v1.2.3 From 58bf437ff64eac8aca606e42d7e4623e40b61fa1 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:49 +0800 Subject: arm/arm64: KVM: Enable 32 bits kvm vcpu events support The commit 539aee0edb9f ("KVM: arm64: Share the parts of get/set events useful to 32bit") shares the get/set events helper for arm64 and arm32, but forgot to share the cap extension code. User space will check whether KVM supports vcpu events by checking the KVM_CAP_VCPU_EVENTS extension Acked-by: James Morse Reviewed-by : Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 1 - virt/kvm/arm/arm.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 337d2fbc2f06..b72a3dd56204 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -86,7 +86,6 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: - case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_VM_IPA_SIZE: diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7c9d7b51ce89..11b98b2b0486 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -213,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: -- cgit v1.2.3 From e4e11cc0f81ee7be17d6f6fb96128a6d51c0e838 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 17 Oct 2018 20:21:16 +0200 Subject: KVM: arm64: Safety check PSTATE when entering guest and handle IL This commit adds a paranoid check when entering the guest to make sure we don't attempt running guest code in an equally or more privilged mode than the hypervisor. We also catch other accidental programming of the SPSR_EL2 which results in an illegal exception return and report this safely back to the user. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/ptrace.h | 3 +++ arch/arm64/kvm/handle_exit.c | 7 +++++++ arch/arm64/kvm/hyp/hyp-entry.S | 16 +++++++++++++++- arch/arm64/kvm/hyp/sysreg-sr.c | 19 ++++++++++++++++++- 5 files changed, 44 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 0b53c72e7591..aea01a09eb94 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -30,6 +30,7 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_EL1_SERROR 1 #define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 /* The hyp-stub will return this for any kvm_call_hyp() call */ #define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..ff35ac1258eb 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -25,6 +25,9 @@ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) +/* Additional SPSR bits not exposed in the UABI */ +#define PSR_IL_BIT (1 << 20) + /* AArch32-specific ptrace requests */ #define COMPAT_PTRACE_GETREGS 12 #define COMPAT_PTRACE_SETREGS 13 diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e5e741bfffe1..35a81bebd02b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; return 0; + case ARM_EXCEPTION_IL: + /* + * We attempted an illegal exception return. Guest state must + * have been corrupted somehow. Give up. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return -EINVAL; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 24b4fbafe3e4..b1f14f736962 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -162,6 +162,20 @@ el1_error: mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit +el2_sync: + /* Check for illegal exception return, otherwise panic */ + mrs x0, spsr_el2 + + /* if this was something else, then panic! */ + tst x0, #PSR_IL_BIT + b.eq __hyp_panic + + /* Let's attempt a recovery from the illegal exception return */ + get_vcpu_ptr x1, x0 + mov x0, #ARM_EXCEPTION_IL + b __guest_exit + + el2_error: ldp x0, x1, [sp], #16 @@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector) invalid_vect el2t_fiq_invalid // FIQ EL2t invalid_vect el2t_error_invalid // Error EL2t - invalid_vect el2h_sync_invalid // Synchronous EL2h + valid_vect el2_sync // Synchronous EL2h invalid_vect el2h_irq_invalid // IRQ EL2h invalid_vect el2h_fiq_invalid // FIQ EL2h valid_vect el2_error // Error EL2h diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..8dc285318204 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { + u64 pstate = ctxt->gp_regs.regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); - write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); + write_sysreg_el2(pstate, spsr); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -- cgit v1.2.3