From 3760dd6efa75c98e223643da2eb7040406433053 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] i386: Use CLFLUSH instead of WBINVD in change_page_attr CLFLUSH is a lot faster than WBINVD so try to use that. Signed-off-by: Andi Kleen --- arch/i386/mm/pageattr.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 8564b6ae17e..ad91528bdc1 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -67,11 +67,17 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } -static void flush_kernel_map(void *dummy) +static void flush_kernel_map(void *arg) { - /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ - if (boot_cpu_data.x86_model >= 4) + unsigned long adr = (unsigned long)arg; + + if (adr && cpu_has_clflush) { + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); + } else if (boot_cpu_data.x86_model >= 4) wbinvd(); + /* Flush all to work around Errata in early athlons regarding * large page flushing. */ @@ -173,9 +179,9 @@ __change_page_attr(struct page *page, pgprot_t prot) return 0; } -static inline void flush_map(void) +static inline void flush_map(void *adr) { - on_each_cpu(flush_kernel_map, NULL, 1, 1); + on_each_cpu(flush_kernel_map, adr, 1, 1); } /* @@ -217,9 +223,13 @@ void global_flush_tlb(void) spin_lock_irq(&cpa_lock); list_replace_init(&df_list, &l); spin_unlock_irq(&cpa_lock); - flush_map(); - list_for_each_entry_safe(pg, next, &l, lru) + if (!cpu_has_clflush) + flush_map(0); + list_for_each_entry_safe(pg, next, &l, lru) { + if (cpu_has_clflush) + flush_map(page_address(pg)); __free_page(pg); + } } #ifdef CONFIG_DEBUG_PAGEALLOC -- cgit v1.2.3 From 11a4180c0b03e2ee0c948fd8430ee092dc1625b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Use probe_kernel_address instead of __get_user in fault paths Makes the intention of the code cleaner to read and avoids a potential deadlock on mmap_sem. Also change the types of the arguments to not include __user because they're really not user addresses. Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 24 +++++++++++++----------- arch/i386/mm/fault.c | 12 ++++++------ 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 237f4884a1e..7b2f9f02208 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -380,7 +380,7 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (in_kernel) { - u8 __user *eip; + u8 *eip; int code_bytes = 64; unsigned char c; @@ -389,18 +389,20 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + eip = (u8 *)regs->eip - 43; + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { /* try starting at EIP */ - eip = (u8 __user *)regs->eip; + eip = (u8 *)regs->eip; code_bytes = 32; } for (i = 0; i < code_bytes; i++, eip++) { - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 *)regs->eip) printk("<%02x> ", c); else printk("%02x ", c); @@ -416,7 +418,7 @@ static void handle_BUG(struct pt_regs *regs) if (eip < PAGE_OFFSET) return; - if (probe_kernel_address((unsigned short __user *)eip, ud2)) + if (probe_kernel_address((unsigned short *)eip, ud2)) return; if (ud2 != 0x0b0f) return; @@ -429,11 +431,11 @@ static void handle_BUG(struct pt_regs *regs) char *file; char c; - if (probe_kernel_address((unsigned short __user *)(eip + 2), - line)) + if (probe_kernel_address((unsigned short *)(eip + 2), line)) break; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (probe_kernel_address((char **)(eip + 4), file) || + (unsigned long)file < PAGE_OFFSET || + probe_kernel_address(file, c)) file = ""; printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 2581575786c..aaaa4d225f7 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -167,7 +167,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, static int __is_prefetch(struct pt_regs *regs, unsigned long addr) { unsigned long limit; - unsigned long instr = get_segment_eip (regs, &limit); + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); int scan_more = 1; int prefetch = 0; int i; @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) unsigned char instr_hi; unsigned char instr_lo; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); -- cgit v1.2.3 From 3529833f1ca8790df06ce218b5d9d438776696ed Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] i386: substitute __va lookup with pfn_to_kaddr Substitutes allocate_pgdat virtual address lookup with pfn_to_kaddr macro. Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/mm/discontig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index ddbdb0336f2..103b76e56a9 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -168,7 +168,7 @@ static void __init allocate_pgdat(int nid) if (nid && node_has_online_mem(nid)) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); min_low_pfn += PFN_UP(sizeof(pg_data_t)); } } -- cgit v1.2.3 From da181a8b3916aa7f2e3c5775d2bd2fe3454cf82d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Add MMU virtualization to paravirt_ops Add the three bare TLB accessor functions to paravirt-ops. Most amusingly, flush_tlb is redefined on SMP, so I can't call the paravirt op flush_tlb. Instead, I chose to indicate the actual flush type, kernel (global) vs. user (non-global). Global in this sense means using the global bit in the page table entry, which makes TLB entries persistent across CR3 reloads, not global as in the SMP sense of invoking remote shootdowns, so the term is confusingly overloaded. AK: folded in fix from Zach for PAE compilation Signed-off-by: Zachary Amsden Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/paravirt.c | 109 ++++++++++++++++++++++++++++++++++++++ arch/i386/mm/boot_ioremap.c | 1 + include/asm-i386/paravirt.h | 75 ++++++++++++++++++++++++++ include/asm-i386/pgtable-2level.h | 5 +- include/asm-i386/pgtable-3level.h | 40 +++++++------- include/asm-i386/pgtable.h | 4 +- include/asm-i386/tlbflush.h | 18 +++++-- 7 files changed, 226 insertions(+), 26 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index fe82eb3adf4..3dceab5828f 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -31,6 +31,7 @@ #include #include #include +#include /* nop stub */ static void native_nop(void) @@ -379,6 +380,97 @@ static fastcall void native_io_delay(void) asm volatile("outb %al,$0x80"); } +static fastcall void native_flush_tlb(void) +{ + __native_flush_tlb(); +} + +/* + * Global pages have to be flushed a bit differently. Not a real + * performance problem because this does not happen often. + */ +static fastcall void native_flush_tlb_global(void) +{ + __native_flush_tlb_global(); +} + +static fastcall void native_flush_tlb_single(u32 addr) +{ + __native_flush_tlb_single(addr); +} + +#ifndef CONFIG_X86_PAE +static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; +} + +#else /* CONFIG_X86_PAE */ + +static fastcall void native_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + set_64bit((unsigned long long *)ptep,pte_val(pteval)); +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); +} + +static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) +{ + *pudp = pudval; +} + +static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; +} + +static fastcall void native_pmd_clear(pmd_t *pmd) +{ + u32 *tmp = (u32 *)pmd; + *tmp = 0; + smp_wmb(); + *(tmp + 1) = 0; +} +#endif /* CONFIG_X86_PAE */ + /* These are in entry.S */ extern fastcall void native_iret(void); extern fastcall void native_irq_enable_sysexit(void); @@ -454,6 +546,23 @@ struct paravirt_ops paravirt_ops = { .apic_read = native_apic_read, #endif + .flush_tlb_user = native_flush_tlb, + .flush_tlb_kernel = native_flush_tlb_global, + .flush_tlb_single = native_flush_tlb_single, + + .set_pte = native_set_pte, + .set_pte_at = native_set_pte_at, + .set_pmd = native_set_pmd, + .pte_update = (void *)native_nop, + .pte_update_defer = (void *)native_nop, +#ifdef CONFIG_X86_PAE + .set_pte_atomic = native_set_pte_atomic, + .set_pte_present = native_set_pte_present, + .set_pud = native_set_pud, + .pte_clear = native_pte_clear, + .pmd_clear = native_pmd_clear, +#endif + .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, }; diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 4de11f508c3..4de95a17a7d 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -16,6 +16,7 @@ */ #undef CONFIG_X86_PAE +#undef CONFIG_PARAVIRT #include #include #include diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e2c803fadb1..9f06265065f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -4,6 +4,7 @@ * para-virtualization: those hooks are defined here. */ #include #include +#include #ifdef CONFIG_PARAVIRT /* These are the most performance critical ops, so we want to be able to patch @@ -27,6 +28,7 @@ struct thread_struct; struct Xgt_desc_struct; struct tss_struct; +struct mm_struct; struct paravirt_ops { unsigned int kernel_rpl; @@ -121,6 +123,23 @@ struct paravirt_ops unsigned long (fastcall *apic_read)(unsigned long reg); #endif + void (fastcall *flush_tlb_user)(void); + void (fastcall *flush_tlb_kernel)(void); + void (fastcall *flush_tlb_single)(u32 addr); + + void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); +#ifdef CONFIG_X86_PAE + void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); + void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (fastcall *pmd_clear)(pmd_t *pmdp); +#endif + /* These two are jmp to, not actually called. */ void (fastcall *irq_enable_sysexit)(void); void (fastcall *iret)(void); @@ -297,6 +316,62 @@ static inline unsigned long apic_read(unsigned long reg) #endif +#define __flush_tlb() paravirt_ops.flush_tlb_user() +#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() +#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte(ptep, pteval); +} + +static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + paravirt_ops.set_pmd(pmdp, pmdval); +} + +static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update(mm, addr, ptep); +} + +static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update_defer(mm, addr, ptep); +} + +#ifdef CONFIG_X86_PAE +static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_atomic(ptep, pteval); +} + +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + paravirt_ops.set_pte_present(mm, addr, ptep, pte); +} + +static inline void set_pud(pud_t *pudp, pud_t pudval) +{ + paravirt_ops.set_pud(pudp, pudval); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + paravirt_ops.pte_clear(mm, addr, ptep); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + paravirt_ops.pmd_clear(pmdp); +} +#endif + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 8d8d3b9ecdb..04d6186abc2 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -13,11 +13,14 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ +#ifndef CONFIG_PARAVIRT #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) #define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index c2d701ea35b..2a6e67db8bc 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -44,6 +44,7 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } +#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is @@ -80,25 +81,6 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte #define set_pud(pudptr,pudval) \ (*(pudptr) = (pudval)) -/* - * Pentium-II erratum A13: in PAE mode we explicitly have to flush - * the TLB via cr3 if the top-level pgd is changed... - * We do not let the generic code free and clear pgd entries due to - * this erratum. - */ -static inline void pud_clear (pud_t * pud) { } - -#define pud_page(pud) \ -((struct page *) __va(pud_val(pud) & PAGE_MASK)) - -#define pud_page_vaddr(pud) \ -((unsigned long) __va(pud_val(pud) & PAGE_MASK)) - - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ - pmd_index(address)) - /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler @@ -118,6 +100,26 @@ static inline void pmd_clear(pmd_t *pmd) smp_wmb(); *(tmp + 1) = 0; } +#endif + +/* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + * We do not let the generic code free and clear pgd entries due to + * this erratum. + */ +static inline void pud_clear (pud_t * pud) { } + +#define pud_page(pud) \ +((struct page *) __va(pud_val(pud) & PAGE_MASK)) + +#define pud_page_vaddr(pud) \ +((unsigned long) __va(pud_val(pud) & PAGE_MASK)) + + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ + pmd_index(address)) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 7d398f493dd..efd7d90789d 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef _I386_BITOPS_H #include @@ -246,6 +247,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p # include #endif +#ifndef CONFIG_PARAVIRT /* * Rules for using pte_update - it must be called after any PTE update which * has not been done using the set_pte / clear_pte interfaces. It is used by @@ -261,7 +263,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - +#endif /* * We only update the dirty/accessed state if we set diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index 360648b0f2b..4dd82840d53 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -4,7 +4,15 @@ #include #include -#define __flush_tlb() \ +#ifdef CONFIG_PARAVIRT +#include +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +#define __native_flush_tlb() \ do { \ unsigned int tmpreg; \ \ @@ -19,7 +27,7 @@ * Global pages have to be flushed a bit differently. Not a real * performance problem because this does not happen often. */ -#define __flush_tlb_global() \ +#define __native_flush_tlb_global() \ do { \ unsigned int tmpreg, cr4, cr4_orig; \ \ @@ -36,6 +44,9 @@ : "memory"); \ } while (0) +#define __native_flush_tlb_single(addr) \ + __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") + # define __flush_tlb_all() \ do { \ if (cpu_has_pge) \ @@ -46,9 +57,6 @@ #define cpu_has_invlpg (boot_cpu_data.x86 > 3) -#define __flush_tlb_single(addr) \ - __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") - #ifdef CONFIG_X86_INVLPG # define __flush_tlb_one(addr) __flush_tlb_single(addr) #else -- cgit v1.2.3 From b0bfece40b1988aa8e3d910938691dce7859d82d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] i386: clear_fixmap() should not use set_pte() While not strictly required with the current code (as the upper half of page table entries generated by __set_fixmap() cannot be non-zero due to the second parameter of this function being 'unsigned long'), the use of set_pte() in __set_fixmap() in the context of clear_fixmap() is still improper with CONFIG_X86_PAE (see the respective comment in include/asm-i386/pgtable-3level.h) and would turn into a bug if that second parameter ever gets changed to a 64-bit type. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/mm/pgtable.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 10126e3f817..65b5c095903 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -95,8 +95,11 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) return; } pte = pte_offset_kernel(pmd, vaddr); - /* stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); + if (pgprot_val(flags)) + /* stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte(pfn, flags)); + else + pte_clear(&init_mm, vaddr, pte); /* * It's enough to flush this one mapping. -- cgit v1.2.3 From bf7e6a196318316e921f357557fca9d11d15f486 Mon Sep 17 00:00:00 2001 From: Artiom Myaskouvskey Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: Preserve EFI run time regions with memmap parameter When using memmap kernel parameter in EFI boot we should also add to memory map memory regions of runtime services to enable their mapping later. AK: merged and cleaned up the patch Signed-off-by: Artiom Myaskouvskey Signed-off-by: Andi Kleen --- arch/i386/kernel/e820.c | 60 +++++++++++++++++++++++++++++++++++-------------- arch/i386/mm/init.c | 2 -- include/linux/efi.h | 1 + 3 files changed, 44 insertions(+), 19 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index b704790f796..2f7d0a92fd7 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -742,29 +742,55 @@ void __init print_memory_map(char *who) } } -void __init limit_regions(unsigned long long size) +static __init __always_inline void efi_limit_regions(unsigned long long size) { unsigned long long current_addr = 0; + efi_memory_desc_t *md, *next_md; + void *p, *p1; + int i, j; + + j = 0; + p1 = memmap.map; + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; + next_md = p1; + current_addr = md->phys_addr + + PFN_PHYS(md->num_pages); + if (is_available_memory(md)) { + if (md->phys_addr >= size) continue; + memcpy(next_md, md, memmap.desc_size); + if (current_addr >= size) { + next_md->num_pages -= + PFN_UP(current_addr-size); + } + p1 += memmap.desc_size; + next_md = p1; + j++; + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == + EFI_MEMORY_RUNTIME) { + /* In order to make runtime services + * available we have to include runtime + * memory regions in memory map */ + memcpy(next_md, md, memmap.desc_size); + p1 += memmap.desc_size; + next_md = p1; + j++; + } + } + memmap.nr_map = j; + memmap.map_end = memmap.map + + (memmap.nr_map * memmap.desc_size); +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr; int i; print_memory_map("limit_regions start"); if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } + efi_limit_regions(size); + return; } for (i = 0; i < e820.nr_map; i++) { current_addr = e820.map[i].addr + e820.map[i].size; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 167416155ee..f4dd048187f 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -192,8 +192,6 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } -extern int is_available_memory(efi_memory_desc_t *); - int page_is_ram(unsigned long pagenr) { int i; diff --git a/include/linux/efi.h b/include/linux/efi.h index 91ecf49fbf2..df1c91855f0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -302,6 +302,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); +extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.3