summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c85
1 files changed, 62 insertions, 23 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e9473c7c7390..e5fa335a4ea7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -296,7 +296,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, nested_run),
STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
- STATS_DESC_ICOUNTER(VCPU, guest_mode)
+ STATS_DESC_COUNTER(VCPU, preemption_reported),
+ STATS_DESC_COUNTER(VCPU, preemption_other),
+ STATS_DESC_IBOOLEAN(VCPU, guest_mode)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -1615,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
+ /* Guests don't need to know "Fill buffer clear control" exists */
+ data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+
return data;
}
@@ -4625,6 +4630,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
+ /*
+ * The vCPU can be marked preempted if and only if the VM-Exit was on
+ * an instruction boundary and will not trigger guest emulation of any
+ * kind (see vcpu_run). Vendor specific code controls (conservatively)
+ * when this is true, for example allowing the vCPU to be marked
+ * preempted if and only if the VM-Exit was due to a host interrupt.
+ */
+ if (!vcpu->arch.at_instruction_boundary) {
+ vcpu->stat.preemption_other++;
+ return;
+ }
+
+ vcpu->stat.preemption_reported++;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4654,19 +4672,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
int idx;
- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+ if (vcpu->preempted) {
+ if (!vcpu->arch.guest_state_protected)
+ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
- /*
- * Take the srcu lock as memslots will be accessed to check the gfn
- * cache generation against the memslots generation.
- */
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvm_xen_msr_enabled(vcpu->kvm))
- kvm_xen_runstate_set_preempted(vcpu);
- else
- kvm_steal_time_set_preempted(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvm_xen_msr_enabled(vcpu->kvm))
+ kvm_xen_runstate_set_preempted(vcpu);
+ else
+ kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
@@ -6009,6 +6029,11 @@ split_irqchip_unlock:
r = 0;
break;
case KVM_CAP_X86_USER_SPACE_MSR:
+ r = -EINVAL;
+ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER))
+ break;
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
@@ -6163,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
+ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
@@ -9123,15 +9151,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
*/
static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
{
- struct kvm_lapic_irq lapic_irq;
-
- lapic_irq.shorthand = APIC_DEST_NOSHORT;
- lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
- lapic_irq.level = 0;
- lapic_irq.dest_id = apicid;
- lapic_irq.msi_redir_hint = false;
+ /*
+ * All other fields are unused for APIC_DM_REMRD, but may be consumed by
+ * common code, e.g. for tracing. Defer initialization to the compiler.
+ */
+ struct kvm_lapic_irq lapic_irq = {
+ .delivery_mode = APIC_DM_REMRD,
+ .dest_mode = APIC_DEST_PHYSICAL,
+ .shorthand = APIC_DEST_NOSHORT,
+ .dest_id = apicid,
+ };
- lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
@@ -9833,6 +9863,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
return;
down_read(&vcpu->kvm->arch.apicv_update_lock);
+ preempt_disable();
activate = kvm_vcpu_apicv_activated(vcpu);
@@ -9853,6 +9884,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
+ preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
@@ -10422,6 +10454,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
+ /*
+ * If another guest vCPU requests a PV TLB flush in the middle
+ * of instruction emulation, the rest of the emulation could
+ * use a stale page translation. Assume that any code after
+ * this point can start executing an instruction.
+ */
+ vcpu->arch.at_instruction_boundary = false;
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
@@ -12602,9 +12641,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);