From bf998156d24bcb127318ad5bf531ac3bdfcd6449 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 31 May 2010 14:28:19 +0800 Subject: KVM: Avoid killing userspace through guest SRAO MCE on unmapped pages In common cases, guest SRAO MCE will cause corresponding poisoned page be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay the MCE to guest OS. But it is reported that if the poisoned page is accessed in guest after unmapping and before MCE is relayed to guest OS, userspace will be killed. The reason is as follows. Because poisoned page has been un-mapped, guest access will cause guest exit and kvm_mmu_page_fault will be called. kvm_mmu_page_fault can not get the poisoned page for fault address, so kernel and user space MMIO processing is tried in turn. In user MMIO processing, poisoned page is accessed again, then userspace is killed by force_sig_info. To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM and do not try kernel and user space MMIO processing for poisoned page. [xiao: fix warning introduced by avi] Reported-by: Max Asbock Signed-off-by: Huang Ying Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- mm/memory-failure.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'mm') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 620b0b461593..378b0f61fd3c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1296,3 +1297,32 @@ done: /* keep elevated page count for bad page */ return ret; } + +int is_hwpoison_address(unsigned long addr) +{ + pgd_t *pgdp; + pud_t pud, *pudp; + pmd_t pmd, *pmdp; + pte_t pte, *ptep; + swp_entry_t entry; + + pgdp = pgd_offset(current->mm, addr); + if (!pgd_present(*pgdp)) + return 0; + pudp = pud_offset(pgdp, addr); + pud = *pudp; + if (!pud_present(pud) || pud_large(pud)) + return 0; + pmdp = pmd_offset(pudp, addr); + pmd = *pmdp; + if (!pmd_present(pmd) || pmd_large(pmd)) + return 0; + ptep = pte_offset_map(pmdp, addr); + pte = *ptep; + pte_unmap(ptep); + if (!is_swap_pte(pte)) + return 0; + entry = pte_to_swp_entry(pte); + return is_hwpoison_entry(entry); +} +EXPORT_SYMBOL_GPL(is_hwpoison_address); -- cgit v1.2.3 From bbeb34062fbad287c949a945a516a0c15b179993 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 22 Jun 2010 14:23:11 +0800 Subject: KVM: Fix a race condition for usage of is_hwpoison_address() is_hwpoison_address accesses the page table, so the caller must hold current->mm->mmap_sem in read mode. So fix its usage in hva_to_pfn of kvm accordingly. Comment is_hwpoison_address to remind other users. Reported-by: Avi Kivity Signed-off-by: Huang Ying Signed-off-by: Avi Kivity --- mm/memory-failure.c | 3 +++ virt/kvm/kvm_main.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 378b0f61fd3c..6b44e52cacaa 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1298,6 +1298,9 @@ done: return ret; } +/* + * The caller must hold current->mm->mmap_sem in read mode. + */ int is_hwpoison_address(unsigned long addr) { pgd_t *pgdp; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 74f731920945..ec2e3c6ac7ed 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -947,12 +947,13 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) if (unlikely(npages != 1)) { struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); if (is_hwpoison_address(addr)) { + up_read(¤t->mm->mmap_sem); get_page(hwpoison_page); return page_to_pfn(hwpoison_page); } - down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma == NULL || addr < vma->vm_start || -- cgit v1.2.3