From 78588335065967993618106c664fe62d2e271435 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 21 Nov 2017 13:40:17 +0100 Subject: kvm_main: Use common error handling code in kvm_dev_ioctl_create_vm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a jump target so that a bit of exception handling can be better reused at the end of this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 210bf820385a..bc092e3d1d73 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3186,21 +3186,18 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return PTR_ERR(kvm); #ifdef CONFIG_KVM_MMIO r = kvm_coalesced_mmio_init(kvm); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } + if (r < 0) + goto put_kvm; #endif r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } + if (r < 0) + goto put_kvm; + file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { put_unused_fd(r); - kvm_put_kvm(kvm); - return PTR_ERR(file); + r = PTR_ERR(file); + goto put_kvm; } /* @@ -3218,6 +3215,10 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) fd_install(r, file); return r; + +put_kvm: + kvm_put_kvm(kvm); + return r; } static long kvm_dev_ioctl(struct file *filp, -- cgit v1.2.3 From ec7660ccdd2b71d8c7f0243f8590253713e9b75d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:23 +0100 Subject: KVM: Take vcpu->mutex outside vcpu_load As we're about to call vcpu_load() from architecture-specific implementations of the KVM vcpu ioctls, but yet we access data structures protected by the vcpu->mutex in the generic code, factor this logic out from vcpu_load(). x86 is the only architecture which calls vcpu_load() outside of the main vcpu ioctl function, and these calls will no longer take the vcpu mutex following this patch. However, with the exception of kvm_arch_vcpu_postcreate (see below), the callers are either in the creation or destruction path of the VCPU, which means there cannot be any concurrent access to the data structure, because the file descriptor is not yet accessible, or is already gone. kvm_arch_vcpu_postcreate makes the newly created vcpu potentially accessible by other in-kernel threads through the kvm->vcpus array, and we therefore take the vcpu mutex in this case directly. Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 +--- arch/x86/kvm/x86.c | 20 +++++++------------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 17 ++++++----------- 4 files changed, 15 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b016cf1b9f77..ef7d13e536a4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9496,10 +9496,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); vmx_switch_vmcs(vcpu, &vmx->vmcs01); free_nested(vmx); vcpu_put(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54d66f238e20..3f2c78f58570 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7767,16 +7767,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - int r; - kvm_vcpu_mtrr_init(vcpu); - r = vcpu_load(vcpu); - if (r) - return r; + vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); - return r; + return 0; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -7786,13 +7782,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) kvm_hv_vcpu_postcreate(vcpu); - if (vcpu_load(vcpu)) + if (mutex_lock_killable(&vcpu->mutex)) return; + vcpu_load(vcpu); msr.data = 0x0; msr.index = MSR_IA32_TSC; msr.host_initiated = true; kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + mutex_unlock(&vcpu->mutex); if (!kvmclock_periodic_sync) return; @@ -7803,11 +7801,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - int r; vcpu->arch.apf.msr_val = 0; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); @@ -8179,9 +8175,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - int r; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6bdd4b9f6611..09de0ff3d677 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -533,7 +533,7 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); -int __must_check vcpu_load(struct kvm_vcpu *vcpu); +void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bc092e3d1d73..c4d116b336f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -151,17 +151,12 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) /* * Switches to specified vcpu, until a matching vcpu_put() */ -int vcpu_load(struct kvm_vcpu *vcpu) +void vcpu_load(struct kvm_vcpu *vcpu) { - int cpu; - - if (mutex_lock_killable(&vcpu->mutex)) - return -EINTR; - cpu = get_cpu(); + int cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); - return 0; } EXPORT_SYMBOL_GPL(vcpu_load); @@ -171,7 +166,6 @@ void vcpu_put(struct kvm_vcpu *vcpu) kvm_arch_vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); preempt_enable(); - mutex_unlock(&vcpu->mutex); } EXPORT_SYMBOL_GPL(vcpu_put); @@ -2560,9 +2554,9 @@ static long kvm_vcpu_ioctl(struct file *filp, #endif - r = vcpu_load(vcpu); - if (r) - return r; + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; + vcpu_load(vcpu); switch (ioctl) { case KVM_RUN: { struct pid *oldpid; @@ -2735,6 +2729,7 @@ out_free1: } out: vcpu_put(vcpu); + mutex_unlock(&vcpu->mutex); kfree(fpu); kfree(kvm_sregs); return r; -- cgit v1.2.3 From 8a32dd60ec9488b73e04e5b7bc82b77a2580b1b7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:24 +0100 Subject: KVM: Prepare for moving vcpu_load/vcpu_put into arch specific code In preparation for moving calls to vcpu_load() and vcpu_put() into the architecture specific implementations of the KVM vcpu ioctls, move the calls in the main kvm_vcpu_ioctl() dispatcher function to each case of the ioctl select statement. This allows us to move the vcpu_load() and vcpu_put() calls into architecture specific implementations of vcpu ioctls, one by one. Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c4d116b336f4..7bbaad8717a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2556,13 +2556,13 @@ static long kvm_vcpu_ioctl(struct file *filp, if (mutex_lock_killable(&vcpu->mutex)) return -EINTR; - vcpu_load(vcpu); switch (ioctl) { case KVM_RUN: { struct pid *oldpid; r = -EINVAL; if (arg) goto out; + vcpu_load(vcpu); oldpid = rcu_access_pointer(vcpu->pid); if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ @@ -2574,6 +2574,7 @@ static long kvm_vcpu_ioctl(struct file *filp, put_pid(oldpid); } r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); + vcpu_put(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; } @@ -2584,7 +2585,9 @@ static long kvm_vcpu_ioctl(struct file *filp, kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); if (!kvm_regs) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); + vcpu_put(vcpu); if (r) goto out_free1; r = -EFAULT; @@ -2604,7 +2607,9 @@ out_free1: r = PTR_ERR(kvm_regs); goto out; } + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); + vcpu_put(vcpu); kfree(kvm_regs); break; } @@ -2613,7 +2618,9 @@ out_free1: r = -ENOMEM; if (!kvm_sregs) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); + vcpu_put(vcpu); if (r) goto out; r = -EFAULT; @@ -2629,13 +2636,17 @@ out_free1: kvm_sregs = NULL; goto out; } + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); + vcpu_put(vcpu); break; } case KVM_GET_MP_STATE: { struct kvm_mp_state mp_state; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); + vcpu_put(vcpu); if (r) goto out; r = -EFAULT; @@ -2650,7 +2661,9 @@ out_free1: r = -EFAULT; if (copy_from_user(&mp_state, argp, sizeof(mp_state))) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); + vcpu_put(vcpu); break; } case KVM_TRANSLATE: { @@ -2659,7 +2672,9 @@ out_free1: r = -EFAULT; if (copy_from_user(&tr, argp, sizeof(tr))) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); + vcpu_put(vcpu); if (r) goto out; r = -EFAULT; @@ -2674,7 +2689,9 @@ out_free1: r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof(dbg))) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); + vcpu_put(vcpu); break; } case KVM_SET_SIGNAL_MASK: { @@ -2705,7 +2722,9 @@ out_free1: r = -ENOMEM; if (!fpu) goto out; + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); + vcpu_put(vcpu); if (r) goto out; r = -EFAULT; @@ -2721,14 +2740,17 @@ out_free1: fpu = NULL; goto out; } + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); + vcpu_put(vcpu); break; } default: + vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); + vcpu_put(vcpu); } out: - vcpu_put(vcpu); mutex_unlock(&vcpu->mutex); kfree(fpu); kfree(kvm_sregs); -- cgit v1.2.3 From accb757d798c9b4d85cfe3e5972134c586525168 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:25 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_run Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_run(). Signed-off-by: Christoffer Dall Reviewed-by: Christian Borntraeger # s390 parts Reviewed-by: Cornelia Huck [Rebased. - Paolo] Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 3 +++ arch/powerpc/kvm/powerpc.c | 6 +++++- arch/s390/kvm/kvm-s390.c | 10 ++++++++-- arch/x86/kvm/x86.c | 3 ++- virt/kvm/arm/arm.c | 20 ++++++++++++++------ virt/kvm/kvm_main.c | 2 -- 6 files changed, 32 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 75fdeaa8c62f..ba5ecf22bb96 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -446,6 +446,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; + vcpu_load(vcpu); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { @@ -480,6 +482,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) out: kvm_sigset_deactivate(vcpu); + vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..4e2167a7ae19 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1408,6 +1408,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; + vcpu_load(vcpu); + if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) @@ -1422,7 +1424,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; - return r; + goto out; } } #endif @@ -1456,6 +1458,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +out: + vcpu_put(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ec8b68e97d3c..7972598e60d0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3373,9 +3373,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (kvm_run->immediate_exit) return -EINTR; + vcpu_load(vcpu); + if (guestdbg_exit_pending(vcpu)) { kvm_s390_prepare_debug_exit(vcpu); - return 0; + rc = 0; + goto out; } kvm_sigset_activate(vcpu); @@ -3385,7 +3388,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else if (is_vcpu_stopped(vcpu)) { pr_err_ratelimited("can't run stopped vcpu %d\n", vcpu->vcpu_id); - return -EINVAL; + rc = -EINVAL; + goto out; } sync_regs(vcpu, kvm_run); @@ -3415,6 +3419,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; +out: + vcpu_put(vcpu); return rc; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f2c78f58570..af9da75011bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7280,8 +7280,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + vcpu_load(vcpu); kvm_sigset_activate(vcpu); - kvm_load_guest_fpu(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { @@ -7328,6 +7328,7 @@ out: post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); + vcpu_put(vcpu); return r; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..a1b2e8a43ca0 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -619,21 +619,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; + vcpu_load(vcpu); + ret = kvm_vcpu_first_run_init(vcpu); if (ret) - return ret; + goto out; if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) - return ret; - if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) - return 0; + goto out; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { + ret = 0; + goto out; + } } - if (run->immediate_exit) - return -EINTR; + if (run->immediate_exit) { + ret = -EINTR; + goto out; + } kvm_sigset_activate(vcpu); @@ -772,6 +778,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +out: + vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7bbaad8717a2..0b149827570c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2562,7 +2562,6 @@ static long kvm_vcpu_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; - vcpu_load(vcpu); oldpid = rcu_access_pointer(vcpu->pid); if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ @@ -2574,7 +2573,6 @@ static long kvm_vcpu_ioctl(struct file *filp, put_pid(oldpid); } r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); - vcpu_put(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; } -- cgit v1.2.3 From 1fc9b76b3dd2c57ca0fe42742043a5c3cbdc41c1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:26 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_regs Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_get_regs(). Signed-off-by: Christoffer Dall Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 3 +++ arch/powerpc/kvm/book3s.c | 3 +++ arch/powerpc/kvm/booke.c | 3 +++ arch/s390/kvm/kvm-s390.c | 2 ++ arch/x86/kvm/x86.c | 3 +++ virt/kvm/kvm_main.c | 2 -- 6 files changed, 14 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index ba5ecf22bb96..6023b5f808c0 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1162,6 +1162,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++) regs->gpr[i] = vcpu->arch.gprs[i]; @@ -1169,6 +1171,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->lo = vcpu->arch.lo; regs->pc = vcpu->arch.pc; + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 72d977e30952..d85bfd733ccd 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -497,6 +497,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + regs->pc = kvmppc_get_pc(vcpu); regs->cr = kvmppc_get_cr(vcpu); regs->ctr = kvmppc_get_ctr(vcpu); @@ -518,6 +520,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 83b485810aea..e0e4f04c5535 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + regs->pc = vcpu->arch.pc; regs->cr = kvmppc_get_cr(vcpu); regs->ctr = vcpu->arch.ctr; @@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7972598e60d0..44317813f498 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2715,7 +2715,9 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af9da75011bc..3364c6d4f743 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7334,6 +7334,8 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { /* * We are here if userspace calls get_regs() in the middle of @@ -7367,6 +7369,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->rip = kvm_rip_read(vcpu); regs->rflags = kvm_get_rflags(vcpu); + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b149827570c..6dab2e6f8321 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2583,9 +2583,7 @@ static long kvm_vcpu_ioctl(struct file *filp, kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); if (!kvm_regs) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); - vcpu_put(vcpu); if (r) goto out_free1; r = -EFAULT; -- cgit v1.2.3 From 875656fe0c8473c544860d557ca1512753d6aeef Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:27 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_regs Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_set_regs(). Signed-off-by: Christoffer Dall Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 3 +++ arch/powerpc/kvm/book3s.c | 3 +++ arch/powerpc/kvm/booke.c | 3 +++ arch/s390/kvm/kvm-s390.c | 2 ++ arch/x86/kvm/x86.c | 3 +++ virt/kvm/kvm_main.c | 2 -- 6 files changed, 14 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 6023b5f808c0..dd5f463b0e72 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1148,6 +1148,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++) vcpu->arch.gprs[i] = regs->gpr[i]; vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */ @@ -1155,6 +1157,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lo = regs->lo; vcpu->arch.pc = regs->pc; + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d85bfd733ccd..24bc7aabfc44 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -528,6 +528,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + kvmppc_set_pc(vcpu, regs->pc); kvmppc_set_cr(vcpu, regs->cr); kvmppc_set_ctr(vcpu, regs->ctr); @@ -548,6 +550,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index e0e4f04c5535..bcbbeddc3430 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1462,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + vcpu->arch.pc = regs->pc; kvmppc_set_cr(vcpu, regs->cr); vcpu->arch.ctr = regs->ctr; @@ -1483,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 44317813f498..ab08823a854b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2709,7 +2709,9 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3364c6d4f743..f82e87bb3a51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7375,6 +7375,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; vcpu->arch.emulate_regs_need_sync_to_vcpu = false; @@ -7404,6 +7406,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6dab2e6f8321..e25c1a1a4120 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2603,9 +2603,7 @@ out_free1: r = PTR_ERR(kvm_regs); goto out; } - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); - vcpu_put(vcpu); kfree(kvm_regs); break; } -- cgit v1.2.3 From bcdec41cefbea525ad424050650acb0f2eed1378 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:28 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_sregs Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_get_sregs(). Signed-off-by: Christoffer Dall Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s.c | 8 +++++++- arch/powerpc/kvm/booke.c | 9 ++++++++- arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 3 +++ virt/kvm/kvm_main.c | 2 -- 5 files changed, 22 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 24bc7aabfc44..6cc2377549f7 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -484,7 +484,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + int ret; + + vcpu_load(vcpu); + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + vcpu_put(vcpu); + + return ret; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index bcbbeddc3430..f647e121070e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1613,11 +1613,18 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + int ret; + + vcpu_load(vcpu); + sregs->pvr = vcpu->arch.pvr; get_sregs_base(vcpu, sregs); get_sregs_arch206(vcpu, sregs); - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ab08823a854b..87544beff507 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2734,8 +2734,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + vcpu_load(vcpu); + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + + vcpu_put(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f82e87bb3a51..66a8d8cd00f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7425,6 +7425,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, { struct desc_ptr dt; + vcpu_load(vcpu); + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -7456,6 +7458,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, set_bit(vcpu->arch.interrupt.nr, (unsigned long *)sregs->interrupt_bitmap); + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e25c1a1a4120..4c2f6a4d1852 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2612,9 +2612,7 @@ out_free1: r = -ENOMEM; if (!kvm_sregs) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); - vcpu_put(vcpu); if (r) goto out; r = -EFAULT; -- cgit v1.2.3 From b4ef9d4e8cb8938e6c0aa3be672b0aeeb791ecf3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:29 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_sregs Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_set_sregs(). Signed-off-by: Christoffer Dall Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s.c | 8 +++++++- arch/powerpc/kvm/booke.c | 15 ++++++++++----- arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 12 +++++++++--- virt/kvm/kvm_main.c | 2 -- 5 files changed, 30 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 6cc2377549f7..047651622cb8 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -496,7 +496,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + int ret; + + vcpu_load(vcpu); + ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + vcpu_put(vcpu); + + return ret; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f647e121070e..38939a204e26 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1630,20 +1630,25 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - int ret; + int ret = -EINVAL; + vcpu_load(vcpu); if (vcpu->arch.pvr != sregs->pvr) - return -EINVAL; + goto out; ret = set_sregs_base(vcpu, sregs); if (ret < 0) - return ret; + goto out; ret = set_sregs_arch206(vcpu, sregs); if (ret < 0) - return ret; + goto out; + + ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); - return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); +out: + vcpu_put(vcpu); + return ret; } int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 87544beff507..d27a1dd0986b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2726,8 +2726,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + vcpu_load(vcpu); + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + + vcpu_put(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 66a8d8cd00f6..f01a7d73dad7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7525,15 +7525,18 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int mmu_reset_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; + int ret = -EINVAL; + + vcpu_load(vcpu); if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; + goto out; apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) - return -EINVAL; + goto out; dt.size = sregs->idt.limit; dt.address = sregs->idt.base; @@ -7599,7 +7602,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); - return 0; + ret = 0; +out: + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4c2f6a4d1852..e04a216bcd14 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2628,9 +2628,7 @@ out_free1: kvm_sregs = NULL; goto out; } - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); - vcpu_put(vcpu); break; } case KVM_GET_MP_STATE: { -- cgit v1.2.3 From fd2325612c1493c85cce89ea16b2396baca83311 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:30 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_mpstate Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_get_mpstate(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 11 +++++++++-- arch/x86/kvm/x86.c | 3 +++ virt/kvm/arm/arm.c | 3 +++ virt/kvm/kvm_main.c | 2 -- 4 files changed, 15 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d27a1dd0986b..4297f5094a88 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2833,9 +2833,16 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret; + + vcpu_load(vcpu); + /* CHECK_STOP and LOAD are not supported yet */ - return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : - KVM_MP_STATE_OPERATING; + ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; + + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f01a7d73dad7..80791939065f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7465,6 +7465,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + vcpu_load(vcpu); + kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && vcpu->arch.pv.pv_unhalted) @@ -7472,6 +7474,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a1b2e8a43ca0..65d50100ba3c 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -381,11 +381,14 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + vcpu_load(vcpu); + if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e04a216bcd14..8e2f582417c3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2634,9 +2634,7 @@ out_free1: case KVM_GET_MP_STATE: { struct kvm_mp_state mp_state; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); - vcpu_put(vcpu); if (r) goto out; r = -EFAULT; -- cgit v1.2.3 From e83dff5edf0c3f014e4b4ac5e1c86dbe797687c7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:31 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_mpstate Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_set_mpstate(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 3 +++ arch/x86/kvm/x86.c | 14 +++++++++++--- virt/kvm/arm/arm.c | 9 +++++++-- virt/kvm/kvm_main.c | 2 -- 4 files changed, 21 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4297f5094a88..4e6304d45ca5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2850,6 +2850,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int rc = 0; + vcpu_load(vcpu); + /* user space knows about this interface - let it control the state */ vcpu->kvm->arch.user_cpu_state_ctrl = 1; @@ -2867,6 +2869,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, rc = -ENXIO; } + vcpu_put(vcpu); return rc; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 80791939065f..a9e6cca1f46e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7481,15 +7481,19 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret = -EINVAL; + + vcpu_load(vcpu); + if (!lapic_in_kernel(vcpu) && mp_state->mp_state != KVM_MP_STATE_RUNNABLE) - return -EINVAL; + goto out; /* INITs are latched while in SMM */ if ((is_smm(vcpu) || vcpu->arch.smi_pending) && (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) - return -EINVAL; + goto out; if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; @@ -7497,7 +7501,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, } else vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); - return 0; + + ret = 0; +out: + vcpu_put(vcpu); + return ret; } int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 65d50100ba3c..aa6167086211 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -395,6 +395,10 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret = 0; + + vcpu_load(vcpu); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -403,10 +407,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_power_off(vcpu); break; default: - return -EINVAL; + ret = -EINVAL; } - return 0; + vcpu_put(vcpu); + return ret; } /** diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8e2f582417c3..4b5aeb4b8c58 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2649,9 +2649,7 @@ out_free1: r = -EFAULT; if (copy_from_user(&mp_state, argp, sizeof(mp_state))) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); - vcpu_put(vcpu); break; } case KVM_TRANSLATE: { -- cgit v1.2.3 From 1da5b61dac98360a7e50b1565f6d499c6fc8123a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:32 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_translate Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_translate(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/booke.c | 2 ++ arch/x86/kvm/x86.c | 3 +++ virt/kvm/kvm_main.c | 2 -- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 38939a204e26..b5f46517c839 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1791,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { int r; + vcpu_load(vcpu); r = kvmppc_core_vcpu_translate(vcpu, tr); + vcpu_put(vcpu); return r; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9e6cca1f46e..080e00dff426 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7684,6 +7684,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; int idx; + vcpu_load(vcpu); + idx = srcu_read_lock(&vcpu->kvm->srcu); gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -7692,6 +7694,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, tr->writeable = 1; tr->usermode = 0; + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4b5aeb4b8c58..37217ec647b4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2658,9 +2658,7 @@ out_free1: r = -EFAULT; if (copy_from_user(&tr, argp, sizeof(tr))) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); - vcpu_put(vcpu); if (r) goto out; r = -EFAULT; -- cgit v1.2.3 From 66b5656222990f1a536f5900ccd98539f9cf231f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:33 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_guest_debug Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_set_guest_debug(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/guest.c | 15 ++++++++++++--- arch/powerpc/kvm/book3s.c | 2 ++ arch/powerpc/kvm/booke.c | 19 +++++++++++++------ arch/s390/kvm/kvm-s390.c | 16 ++++++++++++---- arch/x86/kvm/x86.c | 4 +++- virt/kvm/kvm_main.c | 2 -- 6 files changed, 42 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5c7f657dd207..d7e3299a7734 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -361,10 +361,16 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { + int ret = 0; + + vcpu_load(vcpu); + trace_kvm_set_guest_debug(vcpu, dbg->control); - if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) - return -EINVAL; + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { + ret = -EINVAL; + goto out; + } if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; @@ -378,7 +384,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, /* If not enabled clear all flags */ vcpu->guest_debug = 0; } - return 0; + +out: + vcpu_put(vcpu); + return ret; } int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 047651622cb8..234531d1bee1 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -755,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { + vcpu_load(vcpu); vcpu->guest_debug = dbg->control; + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b5f46517c839..6038e2e7aee0 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -2016,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { struct debug_reg *dbg_reg; int n, b = 0, w = 0; + int ret = 0; + + vcpu_load(vcpu); if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { vcpu->arch.dbg_reg.dbcr0 = 0; vcpu->guest_debug = 0; kvm_guest_protect_msr(vcpu, MSR_DE, false); - return 0; + goto out; } kvm_guest_protect_msr(vcpu, MSR_DE, true); @@ -2053,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, #endif if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - return 0; + goto out; + ret = -EINVAL; for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { uint64_t addr = dbg->arch.bp[n].addr; uint32_t type = dbg->arch.bp[n].type; @@ -2065,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (type & ~(KVMPPC_DEBUG_WATCH_READ | KVMPPC_DEBUG_WATCH_WRITE | KVMPPC_DEBUG_BREAKPOINT)) - return -EINVAL; + goto out; if (type & KVMPPC_DEBUG_BREAKPOINT) { /* Setting H/W breakpoint */ if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) - return -EINVAL; + goto out; } else { /* Setting H/W watchpoint */ if (kvmppc_booke_add_watchpoint(dbg_reg, addr, type, w++)) - return -EINVAL; + goto out; } } - return 0; + ret = 0; +out: + vcpu_put(vcpu); + return ret; } void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4e6304d45ca5..1c82dda07bc5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2801,13 +2801,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int rc = 0; + vcpu_load(vcpu); + vcpu->guest_debug = 0; kvm_s390_clear_bp_data(vcpu); - if (dbg->control & ~VALID_GUESTDBG_FLAGS) - return -EINVAL; - if (!sclp.has_gpere) - return -EINVAL; + if (dbg->control & ~VALID_GUESTDBG_FLAGS) { + rc = -EINVAL; + goto out; + } + if (!sclp.has_gpere) { + rc = -EINVAL; + goto out; + } if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; @@ -2827,6 +2833,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); } +out: + vcpu_put(vcpu); return rc; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 080e00dff426..9ef0b9299e93 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7625,6 +7625,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, unsigned long rflags; int i, r; + vcpu_load(vcpu); + if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { r = -EBUSY; if (vcpu->arch.exception.pending) @@ -7670,7 +7672,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, r = 0; out: - + vcpu_put(vcpu); return r; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 37217ec647b4..b44762fcaf84 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2673,9 +2673,7 @@ out_free1: r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof(dbg))) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); - vcpu_put(vcpu); break; } case KVM_SET_SIGNAL_MASK: { -- cgit v1.2.3 From 1393123e1e24aba96413d351b9546086ea07504d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:34 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_fpu Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_get_fpu(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 7 +++++-- virt/kvm/kvm_main.c | 2 -- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1c82dda07bc5..39327888bb06 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2762,6 +2762,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + vcpu_load(vcpu); + /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) @@ -2770,6 +2772,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) else memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = vcpu->run->s.regs.fpc; + + vcpu_put(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ef0b9299e93..846d292ea33b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7702,9 +7702,11 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxregs_state *fxsave = - &vcpu->arch.guest_fpu.state.fxsave; + struct fxregs_state *fxsave; + vcpu_load(vcpu); + + fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; fpu->fsw = fxsave->swd; @@ -7714,6 +7716,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fpu->last_dp = fxsave->rdp; memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b44762fcaf84..5791aa687233 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2704,9 +2704,7 @@ out_free1: r = -ENOMEM; if (!fpu) goto out; - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); - vcpu_put(vcpu); if (r) goto out; r = -EFAULT; -- cgit v1.2.3 From 6a96bc7fa0cdd96bac2b8298d708a94f8de6f6d4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:35 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_fpu Move vcpu_load() and vcpu_put() into the architecture specific implementations of kvm_arch_vcpu_ioctl_set_fpu(). Reviewed-by: David Hildenbrand Signed-off-by: Christoffer Dall Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 15 ++++++++++++--- arch/x86/kvm/x86.c | 8 ++++++-- virt/kvm/kvm_main.c | 2 -- 3 files changed, 18 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 39327888bb06..3bd2f83e1fa9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2749,15 +2749,24 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - if (test_fp_ctl(fpu->fpc)) - return -EINVAL; + int ret = 0; + + vcpu_load(vcpu); + + if (test_fp_ctl(fpu->fpc)) { + ret = -EINVAL; + goto out; + } vcpu->run->s.regs.fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, (freg_t *) fpu->fprs); else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - return 0; + +out: + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 846d292ea33b..981b61531ab7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7722,8 +7722,11 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxregs_state *fxsave = - &vcpu->arch.guest_fpu.state.fxsave; + struct fxregs_state *fxsave; + + vcpu_load(vcpu); + + fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; @@ -7734,6 +7737,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fxsave->rdp = fpu->last_dp; memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); + vcpu_put(vcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5791aa687233..ca0ec9fb72ce 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2720,9 +2720,7 @@ out_free1: fpu = NULL; goto out; } - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); - vcpu_put(vcpu); break; } default: -- cgit v1.2.3 From 9b062471e52a1692c5563ba1535c84d708e2ff6f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Dec 2017 21:35:36 +0100 Subject: KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl Move the calls to vcpu_load() and vcpu_put() in to the architecture specific implementations of kvm_arch_vcpu_ioctl() which dispatches further architecture-specific ioctls on to other functions. Some architectures support asynchronous vcpu ioctls which cannot call vcpu_load() or take the vcpu->mutex, because that would prevent concurrent execution with a running VCPU, which is the intended purpose of these ioctls, for example because they inject interrupts. We repeat the separate checks for these specifics in the architecture code for MIPS, S390 and PPC, and avoid taking the vcpu->mutex and calling vcpu_load for these ioctls. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 49 +++++++++++++++++++++++---------------- arch/powerpc/kvm/powerpc.c | 13 ++++++----- arch/s390/kvm/kvm-s390.c | 19 ++++++++------- arch/x86/kvm/x86.c | 22 +++++++++++++----- virt/kvm/arm/arm.c | 58 ++++++++++++++++++++++++++++++++-------------- virt/kvm/kvm_main.c | 2 -- 6 files changed, 103 insertions(+), 60 deletions(-) (limited to 'virt') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index dd5f463b0e72..9200b3def440 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -910,56 +910,65 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, void __user *argp = (void __user *)arg; long r; + if (ioctl == KVM_INTERRUPT) { + struct kvm_mips_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, + irq.irq); + + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + + vcpu_load(vcpu); + switch (ioctl) { case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + r = -EFAULT; if (copy_from_user(®, argp, sizeof(reg))) - return -EFAULT; + break; if (ioctl == KVM_SET_ONE_REG) - return kvm_mips_set_reg(vcpu, ®); + r = kvm_mips_set_reg(vcpu, ®); else - return kvm_mips_get_reg(vcpu, ®); + r = kvm_mips_get_reg(vcpu, ®); + break; } case KVM_GET_REG_LIST: { struct kvm_reg_list __user *user_list = argp; struct kvm_reg_list reg_list; unsigned n; + r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) - return -EFAULT; + break; n = reg_list.n; reg_list.n = kvm_mips_num_regs(vcpu); if (copy_to_user(user_list, ®_list, sizeof(reg_list))) - return -EFAULT; + break; + r = -E2BIG; if (n < reg_list.n) - return -E2BIG; - return kvm_mips_copy_reg_indices(vcpu, user_list->reg); - } - case KVM_INTERRUPT: - { - struct kvm_mips_interrupt irq; - - if (copy_from_user(&irq, argp, sizeof(irq))) - return -EFAULT; - kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, - irq.irq); - - r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; - } + r = kvm_mips_copy_reg_indices(vcpu, user_list->reg); + break; + } case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; + r = -EFAULT; if (copy_from_user(&cap, argp, sizeof(cap))) - return -EFAULT; + break; r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } default: r = -ENOIOCTLCMD; } + + vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4e2167a7ae19..ba8134a989c1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1614,16 +1614,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void __user *argp = (void __user *)arg; long r; - switch (ioctl) { - case KVM_INTERRUPT: { + if (ioctl == KVM_INTERRUPT) { struct kvm_interrupt irq; - r = -EFAULT; if (copy_from_user(&irq, argp, sizeof(irq))) - goto out; - r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); - goto out; + return -EFAULT; + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); } + vcpu_load(vcpu); + + switch (ioctl) { case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; @@ -1663,6 +1663,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: + vcpu_put(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3bd2f83e1fa9..9700d71cb691 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3737,24 +3737,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_S390_IRQ: { struct kvm_s390_irq s390irq; - r = -EFAULT; if (copy_from_user(&s390irq, argp, sizeof(s390irq))) - break; - r = kvm_s390_inject_vcpu(vcpu, &s390irq); - break; + return -EFAULT; + return kvm_s390_inject_vcpu(vcpu, &s390irq); } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; struct kvm_s390_irq s390irq; - r = -EFAULT; if (copy_from_user(&s390int, argp, sizeof(s390int))) - break; + return -EFAULT; if (s390int_to_s390irq(&s390int, &s390irq)) return -EINVAL; - r = kvm_s390_inject_vcpu(vcpu, &s390irq); - break; + return kvm_s390_inject_vcpu(vcpu, &s390irq); } + } + + vcpu_load(vcpu); + + switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_s390_vcpu_store_status(vcpu, arg); @@ -3879,6 +3880,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, default: r = -ENOTTY; } + + vcpu_put(vcpu); return r; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 981b61531ab7..fafaef072f1b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3484,6 +3484,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void *buffer; } u; + vcpu_load(vcpu); + u.buffer = NULL; switch (ioctl) { case KVM_GET_LAPIC: { @@ -3509,8 +3511,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!lapic_in_kernel(vcpu)) goto out; u.lapic = memdup_user(argp, sizeof(*u.lapic)); - if (IS_ERR(u.lapic)) - return PTR_ERR(u.lapic); + if (IS_ERR(u.lapic)) { + r = PTR_ERR(u.lapic); + goto out_nofree; + } r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); break; @@ -3684,8 +3688,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_XSAVE: { u.xsave = memdup_user(argp, sizeof(*u.xsave)); - if (IS_ERR(u.xsave)) - return PTR_ERR(u.xsave); + if (IS_ERR(u.xsave)) { + r = PTR_ERR(u.xsave); + goto out_nofree; + } r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); break; @@ -3707,8 +3713,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_XCRS: { u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); - if (IS_ERR(u.xcrs)) - return PTR_ERR(u.xcrs); + if (IS_ERR(u.xcrs)) { + r = PTR_ERR(u.xcrs); + goto out_nofree; + } r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; @@ -3752,6 +3760,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: kfree(u.buffer); +out_nofree: + vcpu_put(vcpu); return r; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index aa6167086211..cd7d90c9f644 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1003,66 +1003,88 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; struct kvm_device_attr attr; + long r; + + vcpu_load(vcpu); switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; + r = -EFAULT; if (copy_from_user(&init, argp, sizeof(init))) - return -EFAULT; + break; - return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + break; } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + r = -ENOEXEC; if (unlikely(!kvm_vcpu_initialized(vcpu))) - return -ENOEXEC; + break; + r = -EFAULT; if (copy_from_user(®, argp, sizeof(reg))) - return -EFAULT; + break; + if (ioctl == KVM_SET_ONE_REG) - return kvm_arm_set_reg(vcpu, ®); + r = kvm_arm_set_reg(vcpu, ®); else - return kvm_arm_get_reg(vcpu, ®); + r = kvm_arm_get_reg(vcpu, ®); + break; } case KVM_GET_REG_LIST: { struct kvm_reg_list __user *user_list = argp; struct kvm_reg_list reg_list; unsigned n; + r = -ENOEXEC; if (unlikely(!kvm_vcpu_initialized(vcpu))) - return -ENOEXEC; + break; + r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) - return -EFAULT; + break; n = reg_list.n; reg_list.n = kvm_arm_num_regs(vcpu); if (copy_to_user(user_list, ®_list, sizeof(reg_list))) - return -EFAULT; + break; + r = -E2BIG; if (n < reg_list.n) - return -E2BIG; - return kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; + r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; } case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_set_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_set_attr(vcpu, &attr); + break; } case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_get_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_get_attr(vcpu, &attr); + break; } case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_has_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_has_attr(vcpu, &attr); + break; } default: - return -EINVAL; + r = -EINVAL; } + + vcpu_put(vcpu); + return r; } /** diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ca0ec9fb72ce..19c184fa1839 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2724,9 +2724,7 @@ out_free1: break; } default: - vcpu_load(vcpu); r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); - vcpu_put(vcpu); } out: mutex_unlock(&vcpu->mutex); -- cgit v1.2.3 From 5cb0944c0c66004c0d9006a7f0fba5782ae38f69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Dec 2017 17:41:34 +0100 Subject: KVM: introduce kvm_arch_vcpu_async_ioctl After the vcpu_load/vcpu_put pushdown, the handling of asynchronous VCPU ioctl is already much clearer in that it is obvious that they bypass vcpu_load and vcpu_put. However, it is still not perfect in that the different state of the VCPU mutex is still hidden in the caller. Separate those ioctls into a new function kvm_arch_vcpu_async_ioctl that returns -ENOIOCTLCMD for more "traditional" synchronous ioctls. Cc: James Hogan Cc: Paul Mackerras Cc: Christian Borntraeger Reviewed-by: Christoffer Dall Reviewed-by: Cornelia Huck Suggested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/mips/kvm/Kconfig | 1 + arch/mips/kvm/mips.c | 15 ++++++++++++--- arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/powerpc.c | 14 +++++++++++--- arch/s390/kvm/Kconfig | 1 + arch/s390/kvm/kvm-s390.c | 16 ++++++++++++---- include/linux/kvm_host.h | 12 ++++++++++++ virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 12 +++++------- 9 files changed, 58 insertions(+), 17 deletions(-) (limited to 'virt') diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index b17447ce8873..76b93a9c8c9b 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO select MMU_NOTIFIER select SRCU diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 9200b3def440..2549fdd27ee1 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -903,12 +903,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, - unsigned long arg) +long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - long r; if (ioctl == KVM_INTERRUPT) { struct kvm_mips_interrupt irq; @@ -921,6 +920,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, return kvm_vcpu_ioctl_interrupt(vcpu, &irq); } + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + vcpu_load(vcpu); switch (ioctl) { diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b12b8eb39c29..f884a0529dfe 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_EVENTFD + select HAVE_KVM_VCPU_ASYNC_IOCTL select SRCU select KVM_VFIO select IRQ_BYPASS_MANAGER diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ba8134a989c1..66a310779de5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1607,12 +1607,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; } -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - long r; if (ioctl == KVM_INTERRUPT) { struct kvm_interrupt irq; @@ -1620,6 +1619,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return -EFAULT; return kvm_vcpu_ioctl_interrupt(vcpu, &irq); } + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; vcpu_load(vcpu); diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 9a4594e0a1ff..a3dbd459cce9 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9700d71cb691..40f0ae5a883f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3725,13 +3725,11 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, return r; } -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - int idx; - long r; switch (ioctl) { case KVM_S390_IRQ: { @@ -3752,6 +3750,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return kvm_s390_inject_vcpu(vcpu, &s390irq); } } + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + int idx; + long r; vcpu_load(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 09de0ff3d677..ac0062b74aed 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1260,4 +1260,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ +#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); +#else +static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, + unsigned long arg) +{ + return -ENOIOCTLCMD; +} +#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ + #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 70691c08e1ed..cca7e065a075 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -51,3 +51,6 @@ config KVM_COMPAT config HAVE_KVM_IRQ_BYPASS bool + +config HAVE_KVM_VCPU_ASYNC_IOCTL + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 19c184fa1839..b4414842b023 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2544,15 +2544,13 @@ static long kvm_vcpu_ioctl(struct file *filp, if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL; -#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* - * Special cases: vcpu ioctls that are asynchronous to vcpu execution, - * so vcpu_load() would break it. + * Some architectures have vcpu ioctls that are asynchronous to vcpu + * execution; mutex_lock() would break them. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) - return kvm_arch_vcpu_ioctl(filp, ioctl, arg); -#endif - + r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg); + if (r != -ENOIOCTLCMD) + return r; if (mutex_lock_killable(&vcpu->mutex)) return -EINTR; -- cgit v1.2.3 From 4404b336cf32bf709942067fe71d8b5cf70fb2b2 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Tue, 28 Nov 2017 23:48:17 +0100 Subject: KVM: arm: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: virt/kvm/arm/vgic/vgic-its.c:971:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8e633bd9cc1e..465095355666 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, device = vgic_its_alloc_device(its, device_id, itt_addr, num_eventid_bits); - if (IS_ERR(device)) - return PTR_ERR(device); - return 0; + return PTR_ERR_OR_ZERO(device); } /* -- cgit v1.2.3 From 5a24575032971c5a9a4580417a791c427ebdb8e5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 8 Sep 2017 07:07:13 -0700 Subject: KVM: arm/arm64: Remove redundant preemptible checks The __this_cpu_read() and __this_cpu_write() functions already implement checks for the required preemption levels when using CONFIG_DEBUG_PREEMPT which gives you nice error messages and such. Therefore there is no need to explicitly check this using a BUG_ON() in the code (which we don't do for other uses of per cpu variables either). Acked-by: Marc Zyngier Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..3610e132df8b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -71,7 +71,6 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { - BUG_ON(preemptible()); __this_cpu_write(kvm_arm_running_vcpu, vcpu); } @@ -81,7 +80,6 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) */ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) { - BUG_ON(preemptible()); return __this_cpu_read(kvm_arm_running_vcpu); } -- cgit v1.2.3 From 6c1b7521f4a07cc63bbe2dfe290efed47cdb780a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 14 Sep 2017 11:08:45 -0700 Subject: KVM: arm/arm64: Factor out functionality to get vgic mmio requester_vcpu We are about to distinguish between userspace accesses and mmio traps for a number of the mmio handlers. When the requester vcpu is NULL, it means we are handling a userspace access. Factor out the functionality to get the request vcpu into its own function, mostly so we have a common place to document the semantics of the return value. Also take the chance to move the functionality outside of holding a spinlock and instead explicitly disable and enable preemption. This supports PREEMPT_RT kernels as well. Acked-by: Marc Zyngier Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-mmio.c | 44 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index deb51ee16a3d..fdad95f62fa3 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -122,6 +122,27 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +/* + * This function will return the VCPU that performed the MMIO access and + * trapped from within the VM, and will return NULL if this is a userspace + * access. + * + * We can disable preemption locally around accessing the per-CPU variable, + * and use the resolved vcpu pointer after enabling preemption again, because + * even if the current thread is migrated to another CPU, reading the per-CPU + * value later will give us the same value as we update the per-CPU variable + * in the preempt notifier handlers. + */ +static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void) +{ + struct kvm_vcpu *vcpu; + + preempt_disable(); + vcpu = kvm_arm_get_running_vcpu(); + preempt_enable(); + return vcpu; +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -184,24 +205,10 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { - struct kvm_vcpu *requester_vcpu; unsigned long flags; - spin_lock_irqsave(&irq->irq_lock, flags); + struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); - /* - * The vcpu parameter here can mean multiple things depending on how - * this function is called; when handling a trap from the kernel it - * depends on the GIC version, and these functions are also called as - * part of save/restore from userspace. - * - * Therefore, we have to figure out the requester in a reliable way. - * - * When accessing VGIC state from user space, the requester_vcpu is - * NULL, which is fine, because we guarantee that no VCPUs are running - * when accessing VGIC state from user space so irq->vcpu->cpu is - * always -1. - */ - requester_vcpu = kvm_arm_get_running_vcpu(); + spin_lock_irqsave(&irq->irq_lock, flags); /* * If this virtual IRQ was written into a list register, we @@ -213,6 +220,11 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * vgic_change_active_prepare) and still has to sync back this IRQ, * so we release and re-acquire the spin_lock to let the other thread * sync back the IRQ. + * + * When accessing VGIC state from user space, requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ -- cgit v1.2.3 From 70450a9fbe0658e864f882d4351e5bae018b2647 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 4 Sep 2017 11:56:37 +0200 Subject: KVM: arm/arm64: Don't cache the timer IRQ level The timer logic was designed after a strict idea of modeling an interrupt line level in software, meaning that only transitions in the level need to be reported to the VGIC. This works well for the timer, because the arch timer code is in complete control of the device and can track the transitions of the line. However, as we are about to support using the HW bit in the VGIC not just for the timer, but also for VFIO which cannot track transitions of the interrupt line, we have to decide on an interface between the GIC and other subsystems for level triggered mapped interrupts, which both the timer and VFIO can use. VFIO only sees an asserting transition of the physical interrupt line, and tells the VGIC when that happens. That means that part of the interrupt flow is offloaded to the hardware. To use the same interface for VFIO devices and the timer, we therefore have to change the timer (we cannot change VFIO because it doesn't know the details of the device it is assigning to a VM). Luckily, changing the timer is simple, we just need to stop 'caching' the line level, but instead let the VGIC know the state of the timer every time there is a potential change in the line level, and when the line level should be asserted from the timer ISR. The VGIC can ignore extra notifications using its validate mechanism. Reviewed-by: Marc Zyngier Reviewed-by: Andre Przywara Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f9555b1e7f15..9376fe03bf2e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -99,11 +99,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) } vtimer = vcpu_vtimer(vcpu); - if (!vtimer->irq.level) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - if (kvm_timer_irq_can_fire(vtimer)) - kvm_timer_update_irq(vcpu, true, vtimer); - } + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + if (kvm_timer_irq_can_fire(vtimer)) + kvm_timer_update_irq(vcpu, true, vtimer); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_vtimer_update_mask_user(vcpu); @@ -324,12 +322,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + bool level; if (unlikely(!timer->enabled)) return; - if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) - kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); + /* + * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part + * of its lifecycle is offloaded to the hardware, and we therefore may + * not have lowered the irq.level value before having to signal a new + * interrupt, but have to signal an interrupt every time the level is + * asserted. + */ + level = kvm_timer_should_fire(vtimer); + kvm_timer_update_irq(vcpu, level, vtimer); if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); -- cgit v1.2.3 From e40cc57bac792713ff6a1b80a1f67b54c05e5e21 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 29 Aug 2017 10:40:44 +0200 Subject: KVM: arm/arm64: vgic: Support level-triggered mapped interrupts Level-triggered mapped IRQs are special because we only observe rising edges as input to the VGIC, and we don't set the EOI flag and therefore are not told when the level goes down, so that we can re-queue a new interrupt when the level goes up. One way to solve this problem is to side-step the logic of the VGIC and special case the validation in the injection path, but it has the unfortunate drawback of having to peak into the physical GIC state whenever we want to know if the interrupt is pending on the virtual distributor. Instead, we can maintain the current semantics of a level triggered interrupt by sort of treating it as an edge-triggered interrupt, following from the fact that we only observe an asserting edge. This requires us to be a bit careful when populating the LRs and when folding the state back in though: * We lower the line level when populating the LR, so that when subsequently observing an asserting edge, the VGIC will do the right thing. * If the guest never acked the interrupt while running (for example if it had masked interrupts at the CPU level while running), we have to preserve the pending state of the LR and move it back to the line_level field of the struct irq when folding LR state. If the guest never acked the interrupt while running, but changed the device state and lowered the line (again with interrupts masked) then we need to observe this change in the line_level. Both of the above situations are solved by sampling the physical line and set the line level when folding the LR back. * Finally, if the guest never acked the interrupt while running and sampling the line reveals that the device state has changed and the line has been lowered, we must clear the physical active state, since we will otherwise never be told when the interrupt becomes asserted again. This has the added benefit of making the timer optimization patches (https://lists.cs.columbia.edu/pipermail/kvmarm/2017-July/026343.html) a bit simpler, because the timer code doesn't have to clear the active state on the sync anymore. It also potentially improves the performance of the timer implementation because the GIC knows the state or the LR and only needs to clear the active state when the pending bit in the LR is still set, where the timer has to always clear it when returning from running the guest with an injected timer interrupt. Reviewed-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v2.c | 29 +++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 29 +++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic.c | 23 +++++++++++++++++++++++ virt/kvm/arm/vgic/vgic.h | 7 +++++++ 4 files changed, 88 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 80897102da26..c32d7b93ffd1 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= GICH_LR_EOI; } + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v2_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) + irq->line_level = false; + /* The GICv2 LR only holds five bits of priority. */ val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index f47e8481fa45..6b329414e57a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -145,6 +165,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= ICH_LR_EOI; } + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + /* * We currently only support Group1 interrupts, which is a * known defect. This needs to be addressed at some point. diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index ecb8e25f5fe5..4318e9edd075 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -144,6 +144,29 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) kfree(irq); } +/* Get the input level of a mapped IRQ directly from the physical GIC */ +bool vgic_get_phys_line_level(struct vgic_irq *irq) +{ + bool line_level; + + BUG_ON(!irq->hw); + + WARN_ON(irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &line_level)); + return line_level; +} + +/* Set/Clear the physical active state */ +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) +{ + + BUG_ON(!irq->hw); + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_ACTIVE, + active)); +} + /** * kvm_vgic_target_oracle - compute the target vcpu for an irq * diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index efbcf8f96f9c..d0787983a357 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq) return irq->pending_latch || irq->line_level; } +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) +{ + return irq->config == VGIC_CONFIG_LEVEL && irq->hw; +} + /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -140,6 +145,8 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags); void vgic_kick_vcpus(struct kvm *kvm); -- cgit v1.2.3 From b6909a659f8d5de2df36cabb1c0505d262996a24 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 27 Oct 2017 19:30:09 +0200 Subject: KVM: arm/arm64: Support a vgic interrupt line level sample function The GIC sometimes need to sample the physical line of a mapped interrupt. As we know this to be notoriously slow, provide a callback function for devices (such as the timer) which can do this much faster than talking to the distributor, for example by comparing a few in-memory values. Fall back to the good old method of poking the physical GIC if no callback is provided. Reviewed-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 13 ++++++++++++- virt/kvm/arm/arch_timer.c | 3 ++- virt/kvm/arm/vgic/vgic.c | 13 +++++++++---- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8c896540a72c..cdbd142ca7f2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -130,6 +130,17 @@ struct vgic_irq { u8 priority; enum vgic_irq_config config; /* Level or edge */ + /* + * Callback function pointer to in-kernel devices that can tell us the + * state of the input level of mapped level-triggered IRQ faster than + * peaking into the physical GIC. + * + * Always called in non-preemptible section and the functions can use + * kvm_arm_get_running_vcpu() to get the vcpu pointer for private + * IRQs. + */ + bool (*get_input_level)(int vintid); + void *owner; /* Opaque pointer to reserve an interrupt for in-kernel devices. */ }; @@ -331,7 +342,7 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid); + u32 vintid, bool (*get_input_level)(int vindid)); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 9376fe03bf2e..fb0533ed903d 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -834,7 +834,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); + ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, + NULL); if (ret) return ret; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 4318e9edd075..d57b3bf8eb36 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -144,13 +144,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) kfree(irq); } -/* Get the input level of a mapped IRQ directly from the physical GIC */ bool vgic_get_phys_line_level(struct vgic_irq *irq) { bool line_level; BUG_ON(!irq->hw); + if (irq->get_input_level) + return irq->get_input_level(irq->intid); + WARN_ON(irq_get_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, &line_level)); @@ -436,7 +438,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - unsigned int host_irq) + unsigned int host_irq, + bool (*get_input_level)(int vindid)) { struct irq_desc *desc; struct irq_data *data; @@ -456,6 +459,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; + irq->get_input_level = get_input_level; return 0; } @@ -464,10 +468,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; + irq->get_input_level = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid) + u32 vintid, bool (*get_input_level)(int vindid)) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -476,7 +481,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); -- cgit v1.2.3 From df635c5b184da05145db1d63e1dcb1c853f425c2 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 1 Sep 2017 16:25:12 +0200 Subject: KVM: arm/arm64: Support VGIC dist pend/active changes for mapped IRQs For mapped IRQs (with the HW bit set in the LR) we have to follow some rules of the architecture. One of these rules is that VM must not be allowed to deactivate a virtual interrupt with the HW bit set unless the physical interrupt is also active. This works fine when injecting mapped interrupts, because we leave it up to the injector to either set EOImode==1 or manually set the active state of the physical interrupt. However, the guest can set virtual interrupt to be pending or active by writing to the virtual distributor, which could lead to deactivating a virtual interrupt with the HW bit set without the physical interrupt being active. We could set the physical interrupt to active whenever we are about to enter the VM with a HW interrupt either pending or active, but that would be really slow, especially on GICv2. So we take the long way around and do the hard work when needed, which is expected to be extremely rare. When the VM sets the pending state for a HW interrupt on the virtual distributor we set the active state on the physical distributor, because the virtual interrupt can become active and then the guest can deactivate it. When the VM clears the pending state we also clear it on the physical side, because the injector might otherwise raise the interrupt. We also clear the physical active state when the virtual interrupt is not active, since otherwise a SPEND/CPEND sequence from the guest would prevent signaling of future interrupts. Changing the state of mapped interrupts from userspace is not supported, and it's expected that userspace unmaps devices from VFIO before attempting to set the interrupt state, because the interrupt state is driven by hardware. Reviewed-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-mmio.c | 71 +++++++++++++++++++++++++++++++++++++++---- virt/kvm/arm/vgic/vgic.c | 7 +++++ virt/kvm/arm/vgic/vgic.h | 1 + 3 files changed, 73 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index fdad95f62fa3..83d82bd7dc4e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "vgic.h" @@ -143,10 +144,22 @@ static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void) return vcpu; } +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->pending_latch = true; + vgic_irq_set_phys_active(irq, true); +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + bool is_uaccess = !vgic_get_mmio_requester_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -155,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = true; - + if (irq->hw) + vgic_hw_irq_spending(vcpu, irq, is_uaccess); + else + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->pending_latch = false; + + /* + * We don't want the guest to effectively mask the physical + * interrupt by doing a write to SPENDR followed by a write to + * CPENDR for HW interrupts, so we clear the active state on + * the physical side if the virtual interrupt is not active. + * This may lead to taking an additional interrupt on the + * host, but that should not be a problem as the worst that + * can happen is an additional vgic injection. We also clear + * the pending state to maintain proper semantics for edge HW + * interrupts. + */ + vgic_irq_set_phys_pending(irq, false); + if (!irq->active) + vgic_irq_set_phys_active(irq, false); +} + void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + bool is_uaccess = !vgic_get_mmio_requester_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -175,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = false; + if (irq->hw) + vgic_hw_irq_cpending(vcpu, irq, is_uaccess); + else + irq->pending_latch = false; spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); @@ -202,8 +246,19 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, return value; } +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active, bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->active = active; + vgic_irq_set_phys_active(irq, active); +} + static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool new_active_state) + bool active) { unsigned long flags; struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); @@ -231,8 +286,12 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); - irq->active = new_active_state; - if (new_active_state) + if (irq->hw) + vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + else + irq->active = active; + + if (irq->active) vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index d57b3bf8eb36..c7c5ef190afa 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -144,6 +144,13 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) kfree(irq); } +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) +{ + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + pending)); +} + bool vgic_get_phys_line_level(struct vgic_irq *irq) { bool line_level; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index d0787983a357..12c37b89f7a3 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -146,6 +146,7 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags); -- cgit v1.2.3 From 4c60e360d6dfa4d9c3586b687f348eeb3fd675dd Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 27 Oct 2017 19:34:30 +0200 Subject: KVM: arm/arm64: Provide a get_input_level for the arch timer The VGIC can now support the life-cycle of mapped level-triggered interrupts, and we no longer have to read back the timer state on every exit from the VM if we had an asserted timer interrupt signal, because the VGIC already knows if we hit the unlikely case where the guest disables the timer without ACKing the virtual timer interrupt. This means we rework a bit of the code to factor out the functionality to snapshot the timer state from vtimer_save_state(), and we can reuse this functionality in the sync path when we have an irqchip in userspace, and also to support our implementation of the get_input_level() function for the timer. This change also means that we can no longer rely on the timer's view of the interrupt line to set the active state, because we no longer maintain this state for mapped interrupts when exiting from the guest. Instead, we only set the active state if the virtual interrupt is active, and otherwise we simply let the timer fire again and raise the virtual interrupt from the ISR. Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 2 ++ virt/kvm/arm/arch_timer.c | 84 ++++++++++++++++++++------------------------ 2 files changed, 40 insertions(+), 46 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6e45608b2399..b1dcfde0a3ef 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -90,6 +90,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); +bool kvm_arch_timer_get_input_level(int vintid); + #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index fb0533ed903d..d845d67b7062 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -343,6 +343,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) phys_timer_emulate(vcpu); } +static void __timer_snapshot_state(struct arch_timer_context *timer) +{ + timer->cnt_ctl = read_sysreg_el0(cntv_ctl); + timer->cnt_cval = read_sysreg_el0(cntv_cval); +} + static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -354,10 +360,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) if (!vtimer->loaded) goto out; - if (timer->enabled) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - } + if (timer->enabled) + __timer_snapshot_state(vtimer); /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); @@ -454,8 +458,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) bool phys_active; int ret; - phys_active = vtimer->irq.level || - kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); ret = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, @@ -535,54 +538,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) set_cntvoff(0); } -static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) +/* + * With a userspace irqchip we have to check if the guest de-asserted the + * timer and if so, unmask the timer irq signal on the host interrupt + * controller to ensure that we see future timer signals. + */ +static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { - kvm_vtimer_update_mask_user(vcpu); - return; - } - - /* - * If the guest disabled the timer without acking the interrupt, then - * we must make sure the physical and virtual active states are in - * sync by deactivating the physical interrupt, because otherwise we - * wouldn't see the next timer interrupt in the host. - */ - if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) { - int ret; - ret = irq_set_irqchip_state(host_vtimer_irq, - IRQCHIP_STATE_ACTIVE, - false); - WARN_ON(ret); + __timer_snapshot_state(vtimer); + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + kvm_vtimer_update_mask_user(vcpu); + } } } -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if any of the timers have expired while we were running in the guest, - * and inject an interrupt if that was the case. - */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - /* - * If we entered the guest with the vtimer output asserted we have to - * check if the guest has modified the timer so that we should lower - * the line at this point. - */ - if (vtimer->irq.level) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - if (!kvm_timer_should_fire(vtimer)) { - kvm_timer_update_irq(vcpu, false, vtimer); - unmask_vtimer_irq(vcpu); - } - } + unmask_vtimer_irq_user(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) @@ -813,6 +789,22 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) return true; } +bool kvm_arch_timer_get_input_level(int vintid) +{ + struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu(); + struct arch_timer_context *timer; + + if (vintid == vcpu_vtimer(vcpu)->irq.irq) + timer = vcpu_vtimer(vcpu); + else + BUG(); /* We only map the vtimer so far */ + + if (timer->loaded) + __timer_snapshot_state(timer); + + return kvm_timer_should_fire(timer); +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -835,7 +827,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) } ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, - NULL); + kvm_arch_timer_get_input_level); if (ret) return ret; -- cgit v1.2.3 From 61bbe38027334780964e4b2658f722ed0b3cb2f9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 27 Oct 2017 19:57:51 +0200 Subject: KVM: arm/arm64: Avoid work when userspace iqchips are not used We currently check if the VM has a userspace irqchip in several places along the critical path, and if so, we do some work which is only required for having an irqchip in userspace. This is unfortunate, as we could avoid doing any work entirely, if we didn't have to support irqchip in userspace. Realizing the userspace irqchip on ARM is mostly a developer or hobby feature, and is unlikely to be used in servers or other scenarios where performance is a priority, we can use a refcounted static key to only check the irqchip configuration when we have at least one VM that uses an irqchip in userspace. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 2 ++ virt/kvm/arm/arch_timer.c | 6 ++-- virt/kvm/arm/arm.c | 59 ++++++++++++++++++++++++++++----------- 4 files changed, 50 insertions(+), 19 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index a9f7d3f47134..6394fb99da7f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -48,6 +48,8 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ea6cb5b24258..e7218cf7df2a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -47,6 +47,8 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index d845d67b7062..cfcd0323deab 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -103,7 +103,8 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) if (kvm_timer_irq_can_fire(vtimer)) kvm_timer_update_irq(vcpu, true, vtimer); - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + if (static_branch_unlikely(&userspace_irqchip_in_use) && + unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_vtimer_update_mask_user(vcpu); return IRQ_HANDLED; @@ -284,7 +285,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - if (likely(irqchip_in_kernel(vcpu->kvm))) { + if (!static_branch_unlikely(&userspace_irqchip_in_use) && + likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level, diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3610e132df8b..59d8e04c19fa 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -74,6 +74,8 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) __this_cpu_write(kvm_arm_running_vcpu, vcpu); } +DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + /** * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. * Must be called from non-preemptible context @@ -302,6 +304,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + static_branch_dec(&userspace_irqchip_in_use); kvm_arch_vcpu_free(vcpu); } @@ -522,14 +526,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; - /* - * Map the VGIC hardware resources before running a vcpu the first - * time on this VM. - */ - if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { - ret = kvm_vgic_map_resources(kvm); - if (ret) - return ret; + if (likely(irqchip_in_kernel(kvm))) { + /* + * Map the VGIC hardware resources before running a vcpu the + * first time on this VM. + */ + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; + } + } else { + /* + * Tell the rest of the code that there are userspace irqchip + * VMs in the wild. + */ + static_branch_inc(&userspace_irqchip_in_use); } ret = kvm_timer_enable(vcpu); @@ -664,18 +676,29 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vgic_flush_hwstate(vcpu); /* - * If we have a singal pending, or need to notify a userspace - * irqchip about timer or PMU level changes, then we exit (and - * update the timer level state in kvm_timer_update_run - * below). + * Exit if we have a signal pending so that we can deliver the + * signal to user space. */ - if (signal_pending(current) || - kvm_timer_should_notify_user(vcpu) || - kvm_pmu_should_notify_user(vcpu)) { + if (signal_pending(current)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; } + /* + * If we're using a userspace irqchip, then check if we need + * to tell a userspace irqchip about timer or PMU level + * changes and if so, exit to userspace (the actual level + * state gets updated in kvm_timer_update_run and + * kvm_pmu_update_run below). + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) { + if (kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + } + /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. @@ -688,7 +711,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); preempt_enable(); @@ -732,7 +756,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * we don't want vtimer interrupts to race with syncing the * timer virtual interrupt state. */ - kvm_timer_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); /* * We may have taken a host interrupt in HYP mode (ie -- cgit v1.2.3 From d68119864ef4b253a585a1c897cda6936d4b5de9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:14 +0100 Subject: KVM: arm/arm64: Detangle kvm_mmu.h from kvm_hyp.h kvm_hyp.h has an odd dependency on kvm_mmu.h, which makes the opposite inclusion impossible. Let's start with breaking that useless dependency. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_hyp.h | 1 - arch/arm/kvm/hyp/switch.c | 1 + arch/arm/kvm/hyp/tlb.c | 1 + arch/arm64/include/asm/kvm_hyp.h | 1 - arch/arm64/kvm/hyp/debug-sr.c | 1 + arch/arm64/kvm/hyp/switch.c | 1 + arch/arm64/kvm/hyp/tlb.c | 1 + virt/kvm/arm/hyp/vgic-v2-sr.c | 1 + 8 files changed, 6 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index ab20ffa8b9e7..76368de7237b 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #define __hyp_text __section(.hyp.text) notrace diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 330c9ce34ba5..ae45ae96aac2 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -18,6 +18,7 @@ #include #include +#include __asm__(".arch_extension virt"); diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index 6d810af2d9fd..c0edd450e104 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -19,6 +19,7 @@ */ #include +#include /** * Flush per-VMID TLBs diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 08d3bb66c8b7..f26f9cd70c72 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -20,7 +20,6 @@ #include #include -#include #include #define __hyp_text __section(.hyp.text) notrace diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 321c9c05dd9e..360455f86346 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -21,6 +21,7 @@ #include #include #include +#include #define read_debug(r,n) read_sysreg(r##n##_el1) #define write_debug(v,r,n) write_sysreg(v, r##n##_el1) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index f7c651f3a8c0..f3d8bed096f5 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 73464a96c365..131c7772703c 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -16,6 +16,7 @@ */ #include +#include #include static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index d7fd46fe9efb..4fe6e797e8b3 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -21,6 +21,7 @@ #include #include +#include static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { -- cgit v1.2.3 From a15f693935a9f1fec8241cafaca27be4483d4464 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:15 +0100 Subject: KVM: arm/arm64: Split dcache/icache flushing As we're about to introduce opportunistic invalidation of the icache, let's split dcache and icache flushing. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 60 ++++++++++++++++++++++++++++------------ arch/arm64/include/asm/kvm_mmu.h | 13 +++++++-- virt/kvm/arm/mmu.c | 20 ++++++++++---- 3 files changed, 67 insertions(+), 26 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index fa6f2174276b..9fa4b2520974 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -126,21 +126,12 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101; } -static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, - unsigned long size) +static inline void __clean_dcache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, + unsigned long size) { /* - * If we are going to insert an instruction page and the icache is - * either VIPT or PIPT, there is a potential problem where the host - * (or another VM) may have used the same page as this guest, and we - * read incorrect data from the icache. If we're using a PIPT cache, - * we can invalidate just that page, but if we are using a VIPT cache - * we need to invalidate the entire icache - damn shame - as written - * in the ARM ARM (DDI 0406C.b - Page B3-1393). - * - * VIVT caches are tagged using both the ASID and the VMID and doesn't - * need any kind of flushing (DDI 0406C.b - Page B3-1392). + * Clean the dcache to the Point of Coherency. * * We need to do this through a kernel mapping (using the * user-space mapping has proved to be the wrong @@ -155,19 +146,52 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_flush_dcache_to_poc(va, PAGE_SIZE); - if (icache_is_pipt()) - __cpuc_coherent_user_range((unsigned long)va, - (unsigned long)va + PAGE_SIZE); - size -= PAGE_SIZE; pfn++; kunmap_atomic(va); } +} - if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { +static inline void __invalidate_icache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, + unsigned long size) +{ + /* + * If we are going to insert an instruction page and the icache is + * either VIPT or PIPT, there is a potential problem where the host + * (or another VM) may have used the same page as this guest, and we + * read incorrect data from the icache. If we're using a PIPT cache, + * we can invalidate just that page, but if we are using a VIPT cache + * we need to invalidate the entire icache - damn shame - as written + * in the ARM ARM (DDI 0406C.b - Page B3-1393). + * + * VIVT caches are tagged using both the ASID and the VMID and doesn't + * need any kind of flushing (DDI 0406C.b - Page B3-1392). + */ + + VM_BUG_ON(size & ~PAGE_MASK); + + if (icache_is_vivt_asid_tagged()) + return; + + if (!icache_is_pipt()) { /* any kind of VIPT cache */ __flush_icache_all(); + return; + } + + /* PIPT cache. As for the d-side, use a temporary kernel mapping. */ + while (size) { + void *va = kmap_atomic_pfn(pfn); + + __cpuc_coherent_user_range((unsigned long)va, + (unsigned long)va + PAGE_SIZE); + + size -= PAGE_SIZE; + pfn++; + + kunmap_atomic(va); } } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 672c8684d5c2..8034b96fb3a4 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -230,19 +230,26 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, - unsigned long size) +static inline void __clean_dcache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, + unsigned long size) { void *va = page_address(pfn_to_page(pfn)); kvm_flush_dcache_to_poc(va, size); +} +static inline void __invalidate_icache_guest_page(struct kvm_vcpu *vcpu, + kvm_pfn_t pfn, + unsigned long size) +{ if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + void *va = page_address(pfn_to_page(pfn)); + flush_icache_range((unsigned long)va, (unsigned long)va + size); } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..2174244f6317 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1257,10 +1257,16 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, - unsigned long size) +static void clean_dcache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, + unsigned long size) { - __coherent_cache_guest_page(vcpu, pfn, size); + __clean_dcache_guest_page(vcpu, pfn, size); +} + +static void invalidate_icache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, + unsigned long size) +{ + __invalidate_icache_guest_page(vcpu, pfn, size); } static void kvm_send_hwpoison_signal(unsigned long address, @@ -1391,7 +1397,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, new_pmd = kvm_s2pmd_mkwrite(new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, pfn, PMD_SIZE); + clean_dcache_guest_page(vcpu, pfn, PMD_SIZE); + invalidate_icache_guest_page(vcpu, pfn, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -1401,7 +1409,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_pfn_dirty(pfn); mark_page_dirty(kvm, gfn); } - coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE); + clean_dcache_guest_page(vcpu, pfn, PAGE_SIZE); + invalidate_icache_guest_page(vcpu, pfn, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); } -- cgit v1.2.3 From d0e22b4ac3ba23c611739f554392bf5e217df49f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:19 +0100 Subject: KVM: arm/arm64: Limit icache invalidation to prefetch aborts We've so far eagerly invalidated the icache, no matter how the page was faulted in (data or prefetch abort). But we can easily track execution by setting the XN bits in the S2 page tables, get the prefetch abort at HYP and perform the icache invalidation at that time only. As for most VMs, the instruction working set is pretty small compared to the data set, this is likely to save some traffic (specially as the invalidation is broadcast). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 12 ++++++++++++ arch/arm/include/asm/pgtable.h | 4 ++-- arch/arm64/include/asm/kvm_mmu.h | 12 ++++++++++++ arch/arm64/include/asm/pgtable-prot.h | 4 ++-- virt/kvm/arm/mmu.c | 19 +++++++++++++++---- 5 files changed, 43 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index bc8d21e76637..4d7a54cbb3ab 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -85,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) return pmd; } +static inline pte_t kvm_s2pte_mkexec(pte_t pte) +{ + pte_val(pte) &= ~L_PTE_XN; + return pte; +} + +static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) +{ + pmd_val(pmd) &= ~PMD_SECT_XN; + return pmd; +} + static inline void kvm_set_s2pte_readonly(pte_t *pte) { pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 150ece66ddf3..a757401129f9 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -102,8 +102,8 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY) #define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) -#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) +#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 56b3e03c85e7..1e1b20cb348f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) return pmd; } +static inline pte_t kvm_s2pte_mkexec(pte_t pte) +{ + pte_val(pte) &= ~PTE_S2_XN; + return pte; +} + +static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) +{ + pmd_val(pmd) &= ~PMD_S2_XN; + return pmd; +} + static inline void kvm_set_s2pte_readonly(pte_t *pte) { pteval_t old_pteval, pteval; diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 0a5635fb0ef9..4e12dabd342b 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -60,8 +60,8 @@ #define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) -#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) +#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2174244f6317..0417c8e2a81c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1292,7 +1292,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long fault_status) { int ret; - bool write_fault, writable, hugetlb = false, force_pte = false; + bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; @@ -1304,7 +1304,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long flags = 0; write_fault = kvm_is_write_fault(vcpu); - if (fault_status == FSC_PERM && !write_fault) { + exec_fault = kvm_vcpu_trap_is_iabt(vcpu); + VM_BUG_ON(write_fault && exec_fault); + + if (fault_status == FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } @@ -1398,7 +1401,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_pfn_dirty(pfn); } clean_dcache_guest_page(vcpu, pfn, PMD_SIZE); - invalidate_icache_guest_page(vcpu, pfn, PMD_SIZE); + + if (exec_fault) { + new_pmd = kvm_s2pmd_mkexec(new_pmd); + invalidate_icache_guest_page(vcpu, pfn, PMD_SIZE); + } ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { @@ -1410,7 +1417,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty(kvm, gfn); } clean_dcache_guest_page(vcpu, pfn, PAGE_SIZE); - invalidate_icache_guest_page(vcpu, pfn, PAGE_SIZE); + + if (exec_fault) { + new_pte = kvm_s2pte_mkexec(new_pte); + invalidate_icache_guest_page(vcpu, pfn, PAGE_SIZE); + } ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); } -- cgit v1.2.3 From a9c0e12ebee56ef06b7eccdbc73bab71d0018df8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:20 +0100 Subject: KVM: arm/arm64: Only clean the dcache on translation fault The only case where we actually need to perform a dcache maintenance is when we map the page for the first time, and subsequent permission faults do not require cache maintenance. Let's make it conditional on not being a permission fault (and thus a translation fault). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 0417c8e2a81c..f956efbd933d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1400,7 +1400,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, new_pmd = kvm_s2pmd_mkwrite(new_pmd); kvm_set_pfn_dirty(pfn); } - clean_dcache_guest_page(vcpu, pfn, PMD_SIZE); + + if (fault_status != FSC_PERM) + clean_dcache_guest_page(vcpu, pfn, PMD_SIZE); if (exec_fault) { new_pmd = kvm_s2pmd_mkexec(new_pmd); @@ -1416,7 +1418,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_pfn_dirty(pfn); mark_page_dirty(kvm, gfn); } - clean_dcache_guest_page(vcpu, pfn, PAGE_SIZE); + + if (fault_status != FSC_PERM) + clean_dcache_guest_page(vcpu, pfn, PAGE_SIZE); if (exec_fault) { new_pte = kvm_s2pte_mkexec(new_pte); -- cgit v1.2.3 From 7a3796d2ef5bb948f709467eef1bf96edbfc67a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:21 +0100 Subject: KVM: arm/arm64: Preserve Exec permission across R/W permission faults So far, we loose the Exec property whenever we take permission faults, as we always reconstruct the PTE/PMD from scratch. This can be counter productive as we can end-up with the following fault sequence: X -> RO -> ROX -> RW -> RWX Instead, we can lookup the existing PTE/PMD and clear the XN bit in the new entry if it was already cleared in the old one, leadig to a much nicer fault sequence: X -> ROX -> RWX Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 10 ++++++++++ arch/arm64/include/asm/kvm_mmu.h | 10 ++++++++++ virt/kvm/arm/mmu.c | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 4d7a54cbb3ab..aab64fe52146 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -107,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte) return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; } +static inline bool kvm_s2pte_exec(pte_t *pte) +{ + return !(pte_val(*pte) & L_PTE_XN); +} + static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) { pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; @@ -117,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; } +static inline bool kvm_s2pmd_exec(pmd_t *pmd) +{ + return !(pmd_val(*pmd) & PMD_SECT_XN); +} + static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 1e1b20cb348f..126abefffe7f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -203,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte) return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; } +static inline bool kvm_s2pte_exec(pte_t *pte) +{ + return !(pte_val(*pte) & PTE_S2_XN); +} + static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) { kvm_set_s2pte_readonly((pte_t *)pmd); @@ -213,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return kvm_s2pte_readonly((pte_t *)pmd); } +static inline bool kvm_s2pmd_exec(pmd_t *pmd) +{ + return !(pmd_val(*pmd) & PMD_S2_XN); +} + static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f956efbd933d..b83b5a8442bb 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -926,6 +926,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache return 0; } +static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) +{ + pmd_t *pmdp; + pte_t *ptep; + + pmdp = stage2_get_pmd(kvm, NULL, addr); + if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp)) + return false; + + if (pmd_thp_or_huge(*pmdp)) + return kvm_s2pmd_exec(pmdp); + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep || pte_none(*ptep) || !pte_present(*ptep)) + return false; + + return kvm_s2pte_exec(ptep); +} + static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) @@ -1407,6 +1426,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) { new_pmd = kvm_s2pmd_mkexec(new_pmd); invalidate_icache_guest_page(vcpu, pfn, PMD_SIZE); + } else if (fault_status == FSC_PERM) { + /* Preserve execute if XN was already cleared */ + if (stage2_is_exec(kvm, fault_ipa)) + new_pmd = kvm_s2pmd_mkexec(new_pmd); } ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); @@ -1425,6 +1448,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) { new_pte = kvm_s2pte_mkexec(new_pte); invalidate_icache_guest_page(vcpu, pfn, PAGE_SIZE); + } else if (fault_status == FSC_PERM) { + /* Preserve execute if XN was already cleared */ + if (stage2_is_exec(kvm, fault_ipa)) + new_pte = kvm_s2pte_mkexec(new_pte); } ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); -- cgit v1.2.3 From 17ab9d57debaa53d665651e425a0efc4a893c039 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Oct 2017 17:11:22 +0100 Subject: KVM: arm/arm64: Drop vcpu parameter from guest cache maintenance operartions The vcpu parameter isn't used for anything, and gets in the way of further cleanups. Let's get rid of it. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 7 ++----- arch/arm64/include/asm/kvm_mmu.h | 7 ++----- virt/kvm/arm/mmu.c | 18 ++++++++---------- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index aab64fe52146..bc70a1f0f42d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -150,9 +150,7 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, - unsigned long size) +static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { /* * Clean the dcache to the Point of Coherency. @@ -177,8 +175,7 @@ static inline void __clean_dcache_guest_page(struct kvm_vcpu *vcpu, } } -static inline void __invalidate_icache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, +static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) { u32 iclsz; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 126abefffe7f..06f1f9794679 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -252,17 +252,14 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, - unsigned long size) +static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { void *va = page_address(pfn_to_page(pfn)); kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(struct kvm_vcpu *vcpu, - kvm_pfn_t pfn, +static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) { if (icache_is_aliasing()) { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b83b5a8442bb..a1ea43fa75cf 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1276,16 +1276,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, - unsigned long size) +static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { - __clean_dcache_guest_page(vcpu, pfn, size); + __clean_dcache_guest_page(pfn, size); } -static void invalidate_icache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, - unsigned long size) +static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) { - __invalidate_icache_guest_page(vcpu, pfn, size); + __invalidate_icache_guest_page(pfn, size); } static void kvm_send_hwpoison_signal(unsigned long address, @@ -1421,11 +1419,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (fault_status != FSC_PERM) - clean_dcache_guest_page(vcpu, pfn, PMD_SIZE); + clean_dcache_guest_page(pfn, PMD_SIZE); if (exec_fault) { new_pmd = kvm_s2pmd_mkexec(new_pmd); - invalidate_icache_guest_page(vcpu, pfn, PMD_SIZE); + invalidate_icache_guest_page(pfn, PMD_SIZE); } else if (fault_status == FSC_PERM) { /* Preserve execute if XN was already cleared */ if (stage2_is_exec(kvm, fault_ipa)) @@ -1443,11 +1441,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (fault_status != FSC_PERM) - clean_dcache_guest_page(vcpu, pfn, PAGE_SIZE); + clean_dcache_guest_page(pfn, PAGE_SIZE); if (exec_fault) { new_pte = kvm_s2pte_mkexec(new_pte); - invalidate_icache_guest_page(vcpu, pfn, PAGE_SIZE); + invalidate_icache_guest_page(pfn, PAGE_SIZE); } else if (fault_status == FSC_PERM) { /* Preserve execute if XN was already cleared */ if (stage2_is_exec(kvm, fault_ipa)) -- cgit v1.2.3 From 58d6b15e9da5042a99c9c30ad725792e4569150e Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jan 2018 18:19:06 +0000 Subject: KVM: arm/arm64: Handle CPU_PM_ENTER_FAILED cpu_pm_enter() calls the pm notifier chain with CPU_PM_ENTER, then if there is a failure: CPU_PM_ENTER_FAILED. When KVM receives CPU_PM_ENTER it calls cpu_hyp_reset() which will return us to the hyp-stub. If we subsequently get a CPU_PM_ENTER_FAILED, KVM does nothing, leaving the CPU running with the hyp-stub, at odds with kvm_arm_hardware_enabled. Add CPU_PM_ENTER_FAILED as a fallthrough for CPU_PM_EXIT, this reloads KVM based on kvm_arm_hardware_enabled. This is safe even if CPU_PM_ENTER never gets as far as KVM, as cpu_hyp_reinit() calls cpu_hyp_reset() to make sure the hyp-stub is loaded before reloading KVM. Fixes: 67f691976662 ("arm64: kvm: allows kvm cpu hotplug") Cc: # v4.7+ CC: Lorenzo Pieralisi Reviewed-by: Christoffer Dall Signed-off-by: James Morse Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 59d8e04c19fa..639dca0c0560 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1262,6 +1262,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ -- cgit v1.2.3 From 13e59ece5b30f39e4e1e1fac2b2ddc7ed527f3cc Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 25 Jan 2018 14:20:19 +0100 Subject: KVM: arm/arm64: Fix incorrect timer_is_pending logic After the recently introduced support for level-triggered mapped interrupt, I accidentally left the VCPU thread busily going back and forward between the guest and the hypervisor whenever the guest was blocking, because I would always incorrectly report that a timer interrupt was pending. This is because the timer->irq.level field is not valid for mapped interrupts, where we offload the level state to the hardware, and as a result this field is always true. Luckily the problem can be relatively easily solved by not checking the cached signal state of either timer in kvm_timer_should_fire() but instead compute the timer state on the fly, which we do already if the cached signal state wasn't high. In fact, the only reason for checking the cached signal state was a tiny optimization which would only be potentially faster when the polling loop detects a pending timer interrupt, which is quite unlikely. Instead of duplicating the logic from kvm_arch_timer_handler(), we enlighten kvm_timer_should_fire() to report something valid when the timer state is loaded onto the hardware. We can then call this from kvm_arch_timer_handler() as well and avoid the call to __timer_snapshot_state() in kvm_arch_timer_get_input_level(). Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index cfcd0323deab..63cf828f3c4f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -97,10 +97,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); return IRQ_NONE; } - vtimer = vcpu_vtimer(vcpu); - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - if (kvm_timer_irq_can_fire(vtimer)) + vtimer = vcpu_vtimer(vcpu); + if (kvm_timer_should_fire(vtimer)) kvm_timer_update_irq(vcpu, true, vtimer); if (static_branch_unlikely(&userspace_irqchip_in_use) && @@ -230,6 +229,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { u64 cval, now; + if (timer_ctx->loaded) { + u32 cnt_ctl; + + /* Only the virtual timer can be loaded so far */ + cnt_ctl = read_sysreg_el0(cntv_ctl); + return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && + (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && + !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); + } + if (!kvm_timer_irq_can_fire(timer_ctx)) return false; @@ -244,15 +253,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - if (vtimer->irq.level || ptimer->irq.level) - return true; - - /* - * When this is called from withing the wait loop of kvm_vcpu_block(), - * the software view of the timer state is up to date (timer->loaded - * is false), and so we can simply check if the timer should fire now. - */ - if (!vtimer->loaded && kvm_timer_should_fire(vtimer)) + if (kvm_timer_should_fire(vtimer)) return true; return kvm_timer_should_fire(ptimer); @@ -270,9 +271,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu) /* Populate the device bitmap with the timer states */ regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER); - if (vtimer->irq.level) + if (kvm_timer_should_fire(vtimer)) regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; - if (ptimer->irq.level) + if (kvm_timer_should_fire(ptimer)) regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; } @@ -507,8 +508,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; - return vtimer->irq.level != vlevel || - ptimer->irq.level != plevel; + return kvm_timer_should_fire(vtimer) != vlevel || + kvm_timer_should_fire(ptimer) != plevel; } void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) @@ -801,9 +802,6 @@ bool kvm_arch_timer_get_input_level(int vintid) else BUG(); /* We only map the vtimer so far */ - if (timer->loaded) - __timer_snapshot_state(timer); - return kvm_timer_should_fire(timer); } -- cgit v1.2.3 From f1d7231cede93a42d75b6d3c5ca599e94a273e89 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 25 Jan 2018 18:32:29 +0100 Subject: KVM: arm/arm64: Fix userspace_irqchip_in_use counting We were not decrementing the static key count in the right location. kvm_arch_vcpu_destroy() is only called to clean up after a failed VCPU create attempt, whereas kvm_arch_vcpu_free() is called on teardown of the VM as well. Move the static key decrement call to kvm_arch_vcpu_free(). Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 639dca0c0560..04ee7a327870 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + static_branch_dec(&userspace_irqchip_in_use); + kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); @@ -304,8 +307,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) - static_branch_dec(&userspace_irqchip_in_use); kvm_arch_vcpu_free(vcpu); } -- cgit v1.2.3 From cd15d2050c044ca9525ba165e9073ac8e036b8d0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 26 Jan 2018 16:20:22 +0100 Subject: KVM: arm/arm64: Fixup userspace irqchip static key optimization When I introduced a static key to avoid work in the critical path for userspace irqchips which is very rarely used, I accidentally messed up my logic and used && where I should have used ||, because the point was to short-circuit the evaluation in case userspace irqchips weren't even in use. This fixes an issue when running in-kernel irqchip VMs alongside userspace irqchip VMs. Acked-by: Marc Zyngier Fixes: c44c232ee2d3 ("KVM: arm/arm64: Avoid work when userspace iqchips are not used") Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 63cf828f3c4f..fb6bd9b9845e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -286,7 +286,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - if (!static_branch_unlikely(&userspace_irqchip_in_use) && + if (!static_branch_unlikely(&userspace_irqchip_in_use) || likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, -- cgit v1.2.3 From a340b3e229b24a56f1c7f5826b15a3af0f4b13e5 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Wed, 17 Jan 2018 19:18:56 +0100 Subject: kvm: Map PFN-type memory regions as writable (if possible) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For EPT-violations that are triggered by a read, the pages are also mapped with write permissions (if their memory region is also writable). That would avoid getting yet another fault on the same page when a write occurs. This optimization only happens when you have a "struct page" backing the memory region. So also enable it for memory regions that do not have a "struct page". Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: KarimAllah Ahmed Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b4414842b023..8af42eab126d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1428,7 +1428,8 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, - bool write_fault, kvm_pfn_t *p_pfn) + bool write_fault, bool *writable, + kvm_pfn_t *p_pfn) { unsigned long pfn; int r; @@ -1454,6 +1455,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, } + if (writable) + *writable = true; /* * Get a reference here because callers of *hva_to_pfn* and @@ -1519,7 +1522,7 @@ retry: if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn); if (r == -EAGAIN) goto retry; if (r < 0) -- cgit v1.2.3 From e46b469278a59781f9b25ff608af84892963821b Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Sat, 20 Jan 2018 04:04:22 +0900 Subject: kvm: embed vcpu id to dentry of vcpu anon inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All d-entries for vcpu have the same, "anon_inode:kvm-vcpu". That means it is impossible to know the mapping between fds for vcpu and vcpu from userland. # LC_ALL=C ls -l /proc/617/fd | grep vcpu lrwx------. 1 qemu qemu 64 Jan 7 16:50 18 -> anon_inode:kvm-vcpu lrwx------. 1 qemu qemu 64 Jan 7 16:50 19 -> anon_inode:kvm-vcpu It is also impossible to know the mapping between vma for kvm_run structure and vcpu from userland. # LC_ALL=C grep vcpu /proc/617/maps 7f9d842d0000-7f9d842d3000 rw-s 00000000 00:0d 20393 anon_inode:kvm-vcpu 7f9d842d3000-7f9d842d6000 rw-s 00000000 00:0d 20393 anon_inode:kvm-vcpu This change adds vcpu id to d-entries for vcpu. With this change you can get the following output: # LC_ALL=C ls -l /proc/617/fd | grep vcpu lrwx------. 1 qemu qemu 64 Jan 7 16:50 18 -> anon_inode:kvm-vcpu:0 lrwx------. 1 qemu qemu 64 Jan 7 16:50 19 -> anon_inode:kvm-vcpu:1 # LC_ALL=C grep vcpu /proc/617/maps 7f9d842d0000-7f9d842d3000 rw-s 00000000 00:0d 20393 anon_inode:kvm-vcpu:0 7f9d842d3000-7f9d842d6000 rw-s 00000000 00:0d 20393 anon_inode:kvm-vcpu:1 With the mappings known from the output, a tool like strace can report more details of qemu-kvm process activities. Here is the strace output of my local prototype: # ./strace -KK -f -p 617 2>&1 | grep 'KVM_RUN\| K' ... [pid 664] ioctl(18, KVM_RUN, 0) = 0 (KVM_EXIT_MMIO) K ready_for_interrupt_injection=1, if_flag=0, flags=0, cr8=0000000000000000, apic_base=0x000000fee00d00 K phys_addr=0, len=1634035803, [33, 0, 0, 0, 0, 0, 0, 0], is_write=112 [pid 664] ioctl(18, KVM_RUN, 0) = 0 (KVM_EXIT_MMIO) K ready_for_interrupt_injection=1, if_flag=1, flags=0, cr8=0000000000000000, apic_base=0x000000fee00d00 K phys_addr=0, len=1634035803, [33, 0, 0, 0, 0, 0, 0, 0], is_write=112 ... Signed-off-by: Masatake YAMATO Acked-by: Christian Borntraeger Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8af42eab126d..8a937b7cde35 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2415,7 +2415,10 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); + char name[8 + 1 + ITOA_MAX_LEN + 1]; + + snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); + return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -- cgit v1.2.3