diff options
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 73 | ||||
-rw-r--r-- | include/asm-x86_64/pgalloc.h | 28 | ||||
-rw-r--r-- | include/asm-x86_64/pgtable.h | 4 |
5 files changed, 95 insertions, 13 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 66c009e10bac..d9e4067faf05 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -534,6 +534,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28d50dc540e8..b25bc904d42d 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -78,6 +78,8 @@ int register_die_notifier(struct notifier_block *nb) { int err = 0; unsigned long flags; + + vmalloc_sync_all(); spin_lock_irqsave(&die_notifier_lock, flags); err = notifier_chain_register(&die_chain, nb); spin_unlock_irqrestore(&die_notifier_lock, flags); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2e7c3c8ffe03..de91e17daf6f 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address) return -1; if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ @@ -314,16 +316,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) - return; - - if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); - - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); tsk = current; mm = tsk->mm; @@ -351,10 +343,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && ((address >= VMALLOC_START && address < VMALLOC_END))) { - if (vmalloc_fault(address) < 0) - goto bad_area_nosemaphore; - return; + if (vmalloc_fault(address) >= 0) + return; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -362,6 +356,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + + if (likely(regs->eflags & X86_EFLAGS_IF)) + local_irq_enable(); + + if (unlikely(page_fault_trace)) + printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", + regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); + if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -571,6 +576,48 @@ do_sigbus: return; } +DEFINE_SPINLOCK(pgd_lock); +struct page *pgd_list; + +void vmalloc_sync_all(void) +{ + /* Note that races in the updates of insync and start aren't + problematic: + insync can only get set bits added, and updates to start are only + improving performance (without affecting correctness if undone). */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = VMALLOC_START & PGDIR_MASK; + unsigned long address; + + for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + const pgd_t *pgd_ref = pgd_offset_k(address); + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + spin_lock(&pgd_lock); + for (page = pgd_list; page; + page = (struct page *)page->index) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + } + spin_unlock(&pgd_lock); + set_bit(pgd_index(address), insync); + } + if (address == start) + start = address + PGDIR_SIZE; + } + /* Check that there is no need to do the same for the modules area. */ + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + (__START_KERNEL & PGDIR_MASK))); +} + static int __init enable_pagefaulttrace(char *str) { page_fault_trace = 1; diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 08cad2482bcb..43d4c333a8b1 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -45,12 +45,39 @@ static inline void pud_free (pud_t *pud) free_page((unsigned long)pud); } +static inline void pgd_list_add(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + + spin_lock(&pgd_lock); + page->index = (pgoff_t)pgd_list; + if (pgd_list) + pgd_list->private = (unsigned long)&page->index; + pgd_list = page; + page->private = (unsigned long)&pgd_list; + spin_unlock(&pgd_lock); +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + struct page *next, **pprev, *page = virt_to_page(pgd); + + spin_lock(&pgd_lock); + next = (struct page *)page->index; + pprev = (struct page **)page->private; + *pprev = next; + if (next) + next->private = (unsigned long)pprev; + spin_unlock(&pgd_lock); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { unsigned boundary; pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (!pgd) return NULL; + pgd_list_add(pgd); /* * Copy kernel pointers in from init. * Could keep a freelist or slab cache of those because the kernel @@ -67,6 +94,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void pgd_free(pgd_t *pgd) { BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); + pgd_list_del(pgd); free_page((unsigned long)pgd); } diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index def903287193..31e83c3bd022 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -420,6 +420,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +extern spinlock_t pgd_lock; +extern struct page *pgd_list; +void vmalloc_sync_all(void); + #endif /* !__ASSEMBLY__ */ extern int kern_addr_valid(unsigned long addr); |