From 6ffac1e90a17ea0aded5c581204397421eec91b6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 24 Jul 2008 18:07:56 -0700 Subject: x64, fpu: fix possible FPU leakage in error conditions On Thu, Jul 24, 2008 at 03:43:44PM -0700, Linus Torvalds wrote: > So how about this patch as a starting point? This is the RightThing(tm) to > do regardless, and if it then makes it easier to do some other cleanups, > we should do it first. What do you think? restore_fpu_checking() calls init_fpu() in error conditions. While this is wrong(as our main intention is to clear the fpu state of the thread), this was benign before commit 92d140e21f1 ("x86: fix taking DNA during 64bit sigreturn"). Post commit 92d140e21f1, live FPU registers may not belong to this process at this error scenario. In the error condition for restore_fpu_checking() (especially during the 64bit signal return), we are doing init_fpu(), which saves the live FPU register state (possibly belonging to some other process context) into the thread struct (through unlazy_fpu() in init_fpu()). This is wrong and can leak the FPU data. For the signal handler restore error condition in restore_i387(), clear the fpu state present in the thread struct(before ultimately sending a SIGSEGV for badframe). For the paranoid error condition check in math_state_restore(), send a SIGSEGV, if we fail to restore the state. Signed-off-by: Suresh Siddha Cc: Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 11 ++++++++++- arch/x86/kernel/traps_64.c | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd65..ca316b5b742 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; } /* diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..513caaca711 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ - restore_fpu_checking(&me->thread.xstate->fxsave); + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + stts(); + force_sig(SIGSEGV, me); + return; + } task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } -- cgit v1.2.3 From e0d22d03c06c4e2c194d7010bc1e4a972199f156 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 31 Jul 2008 23:43:44 +0200 Subject: x86: fdiv bug detection fix The fdiv detection code writes s32 integer into the boot_cpu_data.fdiv_bug. However, the boot_cpu_data.fdiv_bug is only char (s8) field so the detection overwrites already set fields for other bugs, e.g. the f00f bug field. Use local s32 variable to receive result. This is a partial fix to Bugzilla #9928 - fixes wrong information about the f00f bug (tested) and probably for coma bug (I have no cpu to test this). Signed-off-by: Krzysztof Helt Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9b58a806e8..c8e315f1aa8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; */ static void __init check_fpu(void) { + s32 fdiv_bug; + if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); @@ -74,8 +76,10 @@ static void __init check_fpu(void) "fistpl %0\n\t" "fwait\n\t" "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) + : "=m" (*&fdiv_bug) : "m" (*&x), "m" (*&y)); + + boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) printk("Hmm, FPU with FDIV bug.\n"); } -- cgit v1.2.3 From 31343d8a5079cda57ffd539fcf4f00cea344fe98 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 8 Aug 2008 12:15:57 -0700 Subject: x86: Fix broken VMI in 2.6.27-rc.. The lowmem mapping table created by VMI need not depend on max_low_pfn at all. Instead we now create an extra large mapping which covers all possible lowmem instead of the physical ram that is actually available. This allows the vmi initialization to be done before max_low_pfn could be computed. We also move the vmi_init code very early in the boot process so that nobody accidentally breaks the fixmap dependancy. Signed-off-by: Alok N Kataria Acked-by: Zachary Amsden Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 16 ++++++++-------- arch/x86/kernel/vmi_32.c | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2d888586385..6e5823b9e76 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -604,6 +604,14 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); early_ioremap_init(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); screen_info = boot_params.screen_info; edid_info = boot_params.edid_info; @@ -817,14 +825,6 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be after max_low_pfn is determined, and before kernel - * pagetables are setup. - */ - vmi_init(); -#endif - paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); paravirt_pagetable_setup_done(swapper_pg_dir); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922..6ca515d6db5 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -37,6 +37,7 @@ #include #include #include +#include /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -683,7 +684,7 @@ void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ if (vmi_ops.set_linear_mapping) - vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); + vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); } /* -- cgit v1.2.3 From d388e5fdc461344d04307a3fa83862b9ed429647 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 Aug 2008 15:09:02 -0700 Subject: x86: Restore proper vector locking during cpu hotplug Having cpu_online_map change during assign_irq_vector can result in some really nasty and weird things happening. The one that bit me last time was accessing non existent per cpu memory for non existent cpus. This locking was removed in a sloppy x86_64 and x86_32 merge patch. Guys can we please try and avoid subtly breaking x86 when we are merging files together? Signed-off-by: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_apic_32.c | 6 +----- arch/x86/kernel/io_apic_64.c | 25 +++++++++++++++---------- arch/x86/kernel/smpboot.c | 12 +++++++++--- include/asm-x86/hw_irq.h | 12 ++++++++++-- 4 files changed, 35 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c..09cddb57bec 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -57,7 +57,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq) return vector; } -void setup_vector_irq(int cpu) -{ -} - static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d170..61a83b70c18 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -101,7 +101,7 @@ int timer_through_8259 __initdata; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + static int __assign_irq_vector(int irq, cpumask_t mask) { /* @@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -static void __setup_vector_irq(int cpu) +void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu) } } -void setup_vector_irq(int cpu) -{ - spin_lock(&vector_lock); - __setup_vector_irq(smp_processor_id()); - spin_unlock(&vector_lock); -} - - static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4..da10f07fc59 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused) * for which cpus receive the IPI. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). + * + * We need to hold vector_lock so there the set of online cpus + * does not change while we are assigning vectors to cpus. Holding + * this lock ensures we don't half assign or remove an irq from a cpu. */ ipi_call_lock_irq(); -#ifdef CONFIG_X86_IO_APIC - setup_vector_irq(smp_processor_id()); -#endif + lock_vector_lock(); + __setup_vector_irq(smp_processor_id()); cpu_set(smp_processor_id(), cpu_online_map); + unlock_vector_lock(); ipi_call_unlock_irq(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -1336,7 +1340,9 @@ int __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ + lock_vector_lock(); remove_cpu_from_maps(cpu); + unlock_vector_lock(); fixup_irqs(cpu_online_map); return 0; } diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51df566..edd0b95f14d 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -98,9 +98,17 @@ extern void (*const interrupt[NR_IRQS])(void); #else typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern spinlock_t vector_lock; #endif -extern void setup_vector_irq(int cpu); + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64) +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +extern void __setup_vector_irq(int cpu); +#else +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +static inline void __setup_vector_irq(int cpu) {} +#endif #endif /* !ASSEMBLY_ */ -- cgit v1.2.3 From eeb0d7d113895556db473ff1e638803d7d49bff9 Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 17:44:57 +0200 Subject: x86, debug: tone down arch/x86/kernel/mpparse.c debugging printk commit 11a62a056093a7f25f1595fbd8bd5f93559572b6 turns some formerly nopped debugging printks in arch/x86/kernel/mppparse.c into regular ones. The one at the top of smp_scan_config() in particular also prints on !CONFIG_SMP/CONFIG_X86_LOCAL_APIC kernels and UP machines without anything resembling MP tables which makes their lowly UP owners wonder... Turn the former Dprintk()s into apic_printk()s instead meaning that their printing is dependent on passing the apic=verbose (or =debug) command line param. On 32-bit, "apic" is a __setup() param which isn't early enough for this code and therefore needs a followup changing it into an early_param(). On 64-bit, it already is. Signed-off-by: Rene Herman Cc: Andrew Morton Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed..678090508a6 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) if (x86_quirks->mpc_oem_bus_info) x86_quirks->mpc_oem_bus_info(m, str); else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, @@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, @@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", + bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { -- cgit v1.2.3 From fb6bef8002d54a9c3062abc281893ec7c896d3ce Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 17:45:53 +0200 Subject: x86: make "apic" an early_param() on 32-bit On 32-bit, "apic" is a __setup() param meaning it is parsed rather late in the game. Make it an early_param() for apic_printk() use by arch/x86/kernel/mpparse.c. On 64-bit, it already is an early_param(). Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..f432d4833a1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1726,9 +1726,9 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", str) == 0) apic_verbosity = APIC_VERBOSE; - return 1; + return 0; } -__setup("apic=", apic_set_verbosity); +early_param("apic", apic_set_verbosity); static int __init lapic_insert_resource(void) { -- cgit v1.2.3 From 9b0094f7f289d752868aae1a3984517d712cbb22 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Aug 2008 15:14:55 -0700 Subject: x86, pci-calgary: fix function declaration Fix function declaration: linux-next-20080807/arch/x86/kernel/pci-calgary_64.c:1353:36: warning: non-ANSI function declaration of function 'get_tce_space_from_tar' Signed-off-by: Randy Dunlap Acked-by: Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b67a4b1d4ea..02d19328525 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) * Function for kdump case. Get the tce tables from first kernel * by reading the contents of the base adress register of calgary iommu */ -static void get_tce_space_from_tar() +static void get_tce_space_from_tar(void) { int bus; void __iomem *target; -- cgit v1.2.3 From 48d97cb65e62a5f1122ac2cf1149800d4f4693e8 Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Mon, 11 Aug 2008 19:20:17 +0200 Subject: x86: make "apic" an early_param() on 32-bit, NULL check Cyrill Gorcunov observed: > you turned it into early_param so now it's NULL injecting vulnerabled. > Could you please add checking for NULL str param? fix that. Also, change the name of 'str' into 'arg', to make it more apparent that this is an optional argument that can be NULL, not a string parameter that is empty when unset. Reported-by: Cyrill Gorcunov Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f432d4833a1..039a8d4aaf6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1720,12 +1720,16 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init apic_set_verbosity(char *str) +static int __init apic_set_verbosity(char *arg) { - if (strcmp("debug", str) == 0) + if (!arg) + return -EINVAL; + + if (strcmp(arg, "debug") == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) + else if (strcmp(arg, "verbose") == 0) apic_verbosity = APIC_VERBOSE; + return 0; } early_param("apic", apic_set_verbosity); -- cgit v1.2.3 From b74548e76a0eab1f29546e7c5a589429c069a680 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 11 Aug 2008 13:36:04 -0700 Subject: x86: fix 2.6.27rc1 cannot boot more than 8CPUs Jeff Chua reported that booting a !bigsmp kernel on a 16-way box hangs silently. this is a long-standing issue, smp start AP cpu could check the apic id >=8 etc before trying to start it. achieve this by moving the def_to_bigsmp check later and skip the apicid id > 8 [ mingo@elte.hu: clean up the message that is printed. ] Reported-by: "Jeff Chua" Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar arch/x86/kernel/setup.c | 6 ------ arch/x86/kernel/smpboot.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) --- arch/x86/kernel/setup.c | 6 ------ arch/x86/kernel/smpboot.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6e5823b9e76..68b48e3fbcb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -861,12 +861,6 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) - if (def_to_bigsmp) - printk(KERN_WARNING "More than 8 CPUs detected and " - "CONFIG_X86_PC cannot handle it.\nUse " - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); -#endif kvm_guest_init(); e820_reserve_resources(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index da10f07fc59..91055d7fc1b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -994,7 +994,17 @@ int __cpuinit native_cpu_up(unsigned int cpu) flush_tlb_all(); low_mappings = 1; +#ifdef CONFIG_X86_PC + if (def_to_bigsmp && apicid > 8) { + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + err = -1; + } else + err = do_boot_cpu(apicid, cpu); +#else err = do_boot_cpu(apicid, cpu); +#endif zap_low_mappings(); low_mappings = 0; -- cgit v1.2.3 From c9d08f0860d47ed6a3fe91d0f19335086179be7b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Aug 2008 23:23:03 +0200 Subject: x86: fix 2 section mismatch warnings - map_high() WARNING: vmlinux.o(.text+0x14cf8): Section mismatch in reference from the function map_high() to the function .init.text:init_extra_mapping_uc() The function map_high() references the function __init init_extra_mapping_uc(). This is often because map_high lacks a __init annotation or the annotation of init_extra_mapping_uc is wrong. WARNING: vmlinux.o(.text+0x14d05): Section mismatch in reference from the function map_high() to the function .init.text:init_extra_mapping_wb() The function map_high() references the function __init init_extra_mapping_wb(). This is often because map_high lacks a __init annotation or the annotation of init_extra_mapping_wb is wrong. map_high is called only from __init functions (map_*_high) and calls 2 __init_functions (init_extra_mapping_*) Signed-off-by: Marcin Slusarz Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2cfcbded888..2d7e307c777 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -222,7 +222,7 @@ static __init void map_low_mmrs(void) enum map_type {map_wb, map_uc}; -static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) { unsigned long bytes, paddr; -- cgit v1.2.3 From 6b3560229d3b6be7443fa9f9c6502e660bcfef5f Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 12 Aug 2008 23:23:05 +0200 Subject: x86: fix 2 section mismatch warnings - find_and_reserve_crashkernel WARNING: vmlinux.o(.text+0xcd1f): Section mismatch in reference from the function find_and_reserve_crashkernel() to the function .init.text:find_e820_area() The function find_and_reserve_crashkernel() references the function __init find_e820_area(). This is often because find_and_reserve_crashkernel lacks a __init annotation or the annotation of find_e820_area is wrong. WARNING: vmlinux.o(.text+0xcd38): Section mismatch in reference from the function find_and_reserve_crashkernel() to the function .init.text:reserve_bootmem_generic() The function find_and_reserve_crashkernel() references the function __init reserve_bootmem_generic(). This is often because find_and_reserve_crashkernel lacks a __init annotation or the annotation of reserve_bootmem_generic is wrong. find_and_reserve_crashkernel is called from __init function (reserve_crashkernel) and calls 2 __init functions (find_e820_area, reserve_bootmem_generic), so mark it __init Signed-off-by: Marcin Slusarz Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3fbcb..a4656adab53 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void) * @size: Size of the crashkernel memory to reserve. * Returns the base address on success, and -1ULL on failure. */ -unsigned long long find_and_reserve_crashkernel(unsigned long long size) +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) { const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long start = 0LL; -- cgit v1.2.3 From a726c6009e6eba4acfccf8b683854866eeabb184 Mon Sep 17 00:00:00 2001 From: John Keller Date: Tue, 29 Jul 2008 14:34:16 -0500 Subject: x86: allow MMCONFIG above 4GB on x86_64 SGI UV will have MMCFG base addresses that are greater than 4GB (32 bits). v2: Use CONFIG_RESOURCES_64BIT instead of CONFIG_X86_64. v3: Create a flag, that is set by platform specific code, to disable the > 4GB check. Signed-off-by: John Keller Cc: jpk@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d7129..bfd10fd211c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +{ + if (!strcmp(mcfg->header.oem_id, "SGI")) + acpi_mcfg_64bit_base_addr = TRUE; + + return 0; +} + int __init acpi_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; @@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + + acpi_mcfg_oem_check(mcfg); + for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && + !acpi_mcfg_64bit_base_addr) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); kfree(pci_mmcfg_config); -- cgit v1.2.3 From 23b49c19f6946cc33392a1fc75dd788dd4a90fb7 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Mon, 11 Aug 2008 14:55:31 -0700 Subject: x86: resurrect proper handling of maxcpus= kernel option (v2) For some reason we had two parsers registered for maxcpus=. One in init/main.c and another in arch/x86/smpboot.c. So I nuked the one in arch/x86. Also 64-bit kernels used to handle maxcpus= as documented in Documentation/cpu-hotplug.txt. CPUs with 'id > maxcpus' are initialized but not booted. 32-bit version for some reason ignored them even though all the infrastructure for booting them later is there. In the current mainline both 64 and 32 bit versions are broken. This patch restores the correct behaviour. I've tested x86_64 version on 4- and 8- way Core2 and 2-way Opteron based machines. Various config combinations SMP, !SMP, CPU_HOTPLUG, !CPU_HOTPLUG. Booted with maxcpus=1 and maxcpus=4, etc. Everything is working as expected. So far we've received two reports from different people confirming that 32-bit version also works fine, both on dual core laptops and 16way server machines. [v2: This version fixes visws breakage pointed out by Ingo.] Signed-off-by: Max Krasnyansky Cc: lizf@cn.fujitsu.com Cc: jeff.chua.linux@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 -------- arch/x86/kernel/apic_64.c | 7 ------- arch/x86/kernel/smpboot.c | 14 -------------- arch/x86/kernel/visws_quirks.c | 6 ++---- 4 files changed, 2 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 039a8d4aaf6..f88bd0d982b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1454,8 +1454,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; @@ -1482,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..446c062e831 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -90,7 +90,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ @@ -1062,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 91055d7fc1b..e25287e4a85 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1386,17 +1386,3 @@ void __cpu_die(unsigned int cpu) BUG(); } #endif - -/* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ -static int __init parse_maxcpus(char *arg) -{ - extern unsigned int maxcpus; - - if (arg) - maxcpus = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c4..594ef47f0a6 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early) return 1; } -extern unsigned int __cpuinitdata maxcpus; - /* * The Visual Workstation is Intel MP compliant in the hardware * sense, but it doesn't have a BIOS(-configuration table). @@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve) ncpus = CO_CPU_MAX; } - if (ncpus > maxcpus) - ncpus = maxcpus; + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; -- cgit v1.2.3 From a58f03b07539f6575adaa011712fa139c9343742 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 14 Aug 2008 02:16:30 -0700 Subject: x86: check bigsmp in smp_sanity_check instead of cpu_up clear bits for cpu nr > 8. This allows us to boot the full range of possible CPUs that the supported APIC model will allow. Previously we'd hang or boot up with less than 8 CPUs. Signed-off-by: Yinghai Lu Tested-by: Jeff Chua Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e25287e4a85..a8fb8a980fa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -994,17 +994,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) flush_tlb_all(); low_mappings = 1; -#ifdef CONFIG_X86_PC - if (def_to_bigsmp && apicid > 8) { - printk(KERN_WARNING - "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); - err = -1; - } else - err = do_boot_cpu(apicid, cpu); -#else err = do_boot_cpu(apicid, cpu); -#endif zap_low_mappings(); low_mappings = 0; @@ -1058,6 +1048,34 @@ static __init void disable_smp(void) static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); + +#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) + if (def_to_bigsmp && nr_cpu_ids > 8) { + unsigned int cpu; + unsigned nr; + + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + + nr = 0; + for_each_present_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_present_map); + nr++; + } + + nr = 0; + for_each_possible_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_possible_map); + nr++; + } + + nr_cpu_ids = 8; + } +#endif + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk(KERN_WARNING "weird, boot CPU (#%d) not listed" "by the BIOS.\n", hard_smp_processor_id()); -- cgit v1.2.3 From a6825f1c1fa83b1e92b6715ee5771a4d6524d3b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Aug 2008 12:17:06 +0200 Subject: x86: hpet: workaround SB700 BIOS AMD SB700 based systems with spread spectrum enabled use a SMM based HPET emulation to provide proper frequency setting. The SMM code is initialized with the first HPET register access and takes some time to complete. During this time the config register reads 0xffffffff. We check for max. 1000 loops whether the config register reads a non 0xffffffff value to make sure that HPET is up and running before we go further. A counting loop is safe, as the HPET access takes thousands of CPU cycles. On non SB700 based machines this check is only done once and has no side effects. Based on a quirk patch from: crane cai Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334..59fd3b6b130 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -359,6 +359,7 @@ static int hpet_clocksource_register(void) int __init hpet_enable(void) { unsigned long id; + int i; if (!is_hpet_capable()) return 0; @@ -369,6 +370,29 @@ int __init hpet_enable(void) * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); + + /* + * AMD SB700 based systems with spread spectrum enabled use a + * SMM based HPET emulation to provide proper frequency + * setting. The SMM code is initialized with the first HPET + * register access and takes some time to complete. During + * this time the config register reads 0xffffffff. We check + * for max. 1000 loops whether the config register reads a non + * 0xffffffff value to make sure that HPET is up and running + * before we go further. A counting loop is safe, as the HPET + * access takes thousands of CPU cycles. On non SB700 based + * machines this check is only done once and has no side + * effects. + */ + for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { + if (i == 1000) { + printk(KERN_WARNING + "HPET config register value = 0xFFFFFFFF. " + "Disabling HPET\n"); + goto out_nohpet; + } + } + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; -- cgit v1.2.3 From 967060d00d7ab8e992963a966cd3d18156c02d55 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Aug 2008 15:43:33 -0700 Subject: x86, msr: fix NULL pointer deref due to msr_open on nonexistent CPUs msr_open tests for someone trying to open a device for a nonexistent CPU. However, the function always returns 0, not ret like it should, hence userspace can BUG the kernel trivially. This bug was introduced by the cdev lock_kernel pushdown patch last May. The BUG can be reproduced with these commands: # mknod fubar c 202 8 <-- pick a number less than NR_CPUS that is not the number of an online CPU # cat fubar Signed-off-by: Darrick J. Wong Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9fd80955244..e4393808688 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file) ret = -EIO; /* MSR not supported */ out: unlock_kernel(); - return 0; + return ret; } /* -- cgit v1.2.3 From 519c31bacf78a969efa8d2e55ed8862848f28590 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:15 +0200 Subject: x86, AMD IOMMU: use status bit instead of memory write-back for completion wait Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 17 ++++++++++------- include/asm-x86/amd_iommu_types.h | 4 ++++ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 22d7d050905..028e945c68a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -101,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret; + int ret, ready = 0; + unsigned status = 0; struct iommu_cmd cmd; - volatile u64 ready = 0; - unsigned long ready_phys = virt_to_phys(&ready); unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = upper_32_bits(ready_phys); - cmd.data[2] = 1; /* value written to 'ready' */ + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); iommu->need_sync = 0; @@ -122,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu) while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; - cpu_relax(); + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; } + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 22aa58ca199..32543229db7 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -69,6 +69,9 @@ #define MMIO_EVT_TAIL_OFFSET 0x2018 #define MMIO_STATUS_OFFSET 0x2020 +/* MMIO status bits */ +#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 + /* feature control bits */ #define CONTROL_IOMMU_EN 0x00ULL #define CONTROL_HT_TUN_EN 0x01ULL @@ -89,6 +92,7 @@ #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_COMPL_WAIT_INT_MASK 0x02 #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 -- cgit v1.2.3 From 9f5f5fb35d2934fe7dc0cb019854a030efd10cd7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:16 +0200 Subject: x86, AMD IOMMU: initialize device table properly This patch adds device table initializations which forbids memory accesses for devices per default and disables all page faults. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 18 ++++++++++++++++++ include/asm-x86/amd_iommu_types.h | 1 + 2 files changed, 19 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index d9a9da597e7..ceba3381153 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -800,6 +800,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table) return 0; } +/* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table(void) +{ + u16 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -931,6 +946,9 @@ int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; + /* init the device table */ + init_device_table(); + /* * let all alias entries point to itself */ diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 32543229db7..f0beca73e36 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -103,6 +103,7 @@ #define DEV_ENTRY_TRANSLATION 0x01 #define DEV_ENTRY_IR 0x3d #define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_NO_PAGE_FAULT 0x62 #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 -- cgit v1.2.3 From 8a456695c5020d6317f9c7af190999e9414b0d3e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:17 +0200 Subject: x86m AMD IOMMU: cleanup: replace LOW_U32 macro with generic lower_32_bits Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- include/asm-x86/amd_iommu_types.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 028e945c68a..de39e1f2ede 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, address &= PAGE_MASK; CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; - cmd.data[2] = LOW_U32(address); + cmd.data[2] = lower_32_bits(address); cmd.data[3] = upper_32_bits(address); if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index f0beca73e36..dcc81206739 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -31,9 +31,6 @@ #define ALIAS_TABLE_ENTRY_SIZE 2 #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) -/* helper macros */ -#define LOW_U32(x) ((x) & ((1ULL << 32)-1)) - /* Length of the MMIO region for the AMD IOMMU */ #define MMIO_REGION_LENGTH 0x4000 -- cgit v1.2.3 From 129d6aba444d1e99d4cbfb9866a4652912426b65 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Aug 2008 19:55:18 +0200 Subject: x86, AMD IOMMU: initialize dma_ops after sysfs registration If sysfs registration fails all memory used by IOMMU is freed. This happens after dma_ops initialization and the functions will access the freed memory then. Fix this by initializing dma_ops after the sysfs registration. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index ceba3381153..a69cc0f5204 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -972,15 +972,15 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; - ret = amd_iommu_init_dma_ops(); + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; - ret = sysdev_class_register(&amd_iommu_sysdev_class); + ret = sysdev_register(&device_amd_iommu); if (ret) goto free; - ret = sysdev_register(&device_amd_iommu); + ret = amd_iommu_init_dma_ops(); if (ret) goto free; -- cgit v1.2.3 From dcc984166870150709f0c645b521a47becd9a047 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 14 Aug 2008 16:32:15 -0400 Subject: x86, perfctr: don't use CCCR_OVF_PMI1 on Pentium 4Ds Currently, setup_p4_watchdog() use CCCR_OVF_PMI1 to enable the counter overflow interrupts to the second logical core. But this bit doesn't work on Pentium 4 Ds (model 4, stepping 4) and this patch avoids its use on these processors. Tested on 4 different machines that have this specific model with success. Signed-off-by: Aristeu Rozanski Cc: jvillalovos@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index de7439f82b9..05cc22dbd4f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P4_IQ_PERFCTR1; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); } evntsel = P4_ESCR_EVENT_SELECT(0x3F) -- cgit v1.2.3 From 394a15051c33f2b18e72f42283b36a9388fa414b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 14 Aug 2008 09:11:26 -0500 Subject: x86: invalidate caches before going into suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU core is shut down, all of its caches need to be flushed to prevent stale data from causing errors if the core is resumed. Current Linux suspend code performs an assignment after the flush, which can add dirty data back to the cache.  On some AMD platforms, additional speculative reads have caused crashes on resume because of this dirty data. Relocate the cache flush to be the very last thing done before halting.  Tie into an assembly line so the compile will not reorder it.  Add some documentation explaining what is going on and why we're doing this. Signed-off-by: Mark Langsdorf Acked-by: Mark Borden Acked-by: Michael Hohmuth Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 5 ++--- arch/x86/kernel/process_64.c | 5 ++--- include/asm-x86/processor.h | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 53bc653ed5c..3b7a1ddcc0b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -95,7 +95,6 @@ static inline void play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; @@ -104,8 +103,8 @@ static inline void play_dead(void) * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7d9a1..71553b664e2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 5f58da401b4..4df3e2f6fb5 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -728,6 +728,29 @@ extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; extern unsigned long idle_nomwait; +/* + * on systems with caches, caches must be flashed as the absolute + * last instruction before going into a suspended halt. Otherwise, + * dirty data can linger in the cache and become stale on resume, + * leading to strange errors. + * + * perform a variety of operations to guarantee that the compiler + * will not reorder instructions. wbinvd itself is serializing + * so the processor will not reorder. + * + * Systems without cache can just go into halt. + */ +static inline void wbinvd_halt(void) +{ + mb(); + /* check for clflush to determine if wbinvd is legal */ + if (cpu_has_clflush) + asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); + else + while (1) + halt(); +} + extern void enable_sep_cpu(void); extern int sysenter_setup(void); -- cgit v1.2.3 From 15636668449d4135ac77a79715ba430a81aed16d Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Fri, 15 Aug 2008 08:36:14 -0400 Subject: x86, NMI: fix watchdog failure message > it just won't work at boot time - the second logic unit will be stuck: > > Booting processor 1/2 APIC 0x1 > Initializing CPU#1 > Calibrating delay using timer specific routine.. 5586.12 BogoMIPS (lpj=2793063) > CPU: Trace cache: 12K uops, L1 D cache: 16K > CPU: L2 cache: 1024K > CPU: Physical Processor ID: 0 > CPU: Processor Core ID: 1 > CPU1: Thermal monitoring enabled (TM1) > Intel(R) Pentium(R) D CPU 2.80GHz stepping 04 > Brought up 2 CPUs > testing NMI watchdog ... <4>WARNING: CPU#1: NMI appears to be stuck (0->0)! while at it... - fix that newline Signed-off-by: Aristeu Rozanski Cc: jvillalo@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac6d51222e7..919473ad4a2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -142,11 +142,15 @@ int __init check_nmi_watchdog(void) if (!per_cpu(wd_enabled, cpu)) continue; if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { + printk("\n"); printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + printk(KERN_WARNING "Please report this to " + "linux-kernel@vger.kernel.org and attach " + "the output of 'dmesg' command.\n"); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } -- cgit v1.2.3 From 8bb851900f5d0a79d3fddac808cc670d9894ef67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Aug 2008 15:34:32 +0200 Subject: x86, nmi: clean UP NMI watchdog failure message clean up the failure message - and redirect people to bugzilla instead of lkml. Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 919473ad4a2..abb78a2cc4a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) } #endif +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); +} + int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; @@ -141,19 +158,8 @@ int __init check_nmi_watchdog(void) for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("\n"); - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - printk(KERN_WARNING "Please report this to " - "linux-kernel@vger.kernel.org and attach " - "the output of 'dmesg' command.\n"); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); } endflag = 1; if (!atomic_read(&nmi_active)) { -- cgit v1.2.3 From 9744f5a32853642f8ed0749a1c9ed8cf9c9c9dc4 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 3 Aug 2008 19:25:48 +0200 Subject: x86, acpi: cleanup, temp_stack is used only when CONFIG_SMP is set fix: arch/x86/kernel/acpi/sleep.c:24: warning: 'temp_stack' defined but not used [ Sven Wegener : fix build bug ] Signed-off-by: Marcin Slusarz Acked-by: Pavel Machek Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fa2161d5003..81e5ab6542d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; /* address in low memory of the wakeup routine. */ static unsigned long acpi_realmode; -#ifdef CONFIG_64BIT +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) static char temp_stack[10240]; #endif -- cgit v1.2.3 From 0d5cdc97e242a5589e5dca23277675f4b4482490 Mon Sep 17 00:00:00 2001 From: Jens Rottmann Date: Mon, 4 Aug 2008 14:40:16 +0200 Subject: x86, geode-mfgpt: check IRQ before using MFGPT as clocksource Adds a simple IRQ autodetection to the AMD Geode MFGPT driver, and more importantly, adds some checks, if IRQs can actually be received on the chosen line. This fixes cases where MFGPT is selected as clocksource though not producing any ticks, so the kernel simply starves during boot. Signed-off-by: Jens Rottmann Cc: Andres Salomon Cc: linux-geode@bombadil.infradead.org Cc: Jordan Crouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/mfgpt_32.c | 52 +++++++++++++++++++++++++++++++++------------- include/asm-x86/geode.h | 3 ++- 2 files changed, 39 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f48..3b599518c32 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -33,6 +33,8 @@ #include #include +#define MFGPT_DEFAULT_IRQ 7 + static struct mfgpt_timer_t { unsigned int avail:1; } mfgpt_timers[MFGPT_MAX_TIMERS]; @@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) } EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); -int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) +int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) { - u32 val, dummy; - int offset; + u32 zsel, lpc, dummy; + int shift; if (timer < 0 || timer >= MFGPT_MAX_TIMERS) return -EIO; - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) return -EIO; - rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = MFGPT_DEFAULT_IRQ; - offset = (timer % 4) * 4; - - val &= ~((0xF << offset) | (0xF << (offset + 16))); + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + /* All chosen and checked - go for it */ + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; if (enable) { - val |= (irq & 0x0F) << (offset); - val |= (irq & 0x0F) << (offset + 16); + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); } - wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); return 0; } @@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; static u16 mfgpt_event_clock; -static int irq = 7; +static int irq; static int __init mfgpt_setup(char *str) { get_option(&str, &irq); @@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) mfgpt_event_clock = timer; /* Set up the IRQ on the MFGPT side */ - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); return -EIO; } @@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) &mfgpt_clockevent); printk(KERN_INFO - "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); + "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", + timer, irq); clockevents_register_device(&mfgpt_clockevent); return 0; err: - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); printk(KERN_ERR "mfgpt-timer: Unable to set up the MFGPT clock source\n"); return -EIO; diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index bb06027fc83..2c1cda0b8a8 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -50,6 +50,7 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_PIC_YSEL_HIGH 0x51400021 #define MSR_PIC_ZSEL_LOW 0x51400022 #define MSR_PIC_ZSEL_HIGH 0x51400023 +#define MSR_PIC_IRQM_LPC 0x51400025 #define MSR_MFGPT_IRQ 0x51400028 #define MSR_MFGPT_NR 0x51400029 @@ -237,7 +238,7 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg) } extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); -extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); +extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); extern int geode_mfgpt_alloc_timer(int timer, int domain); #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) -- cgit v1.2.3 From 66d4bdf22b8652cda215e2653c8bbec7a767ed57 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 31 Jul 2008 16:48:31 +0100 Subject: x86-64: fix overlap of modules and fixmap areas Plus add a build time check so this doesn't go unnoticed again. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 1 + include/asm-x86/pgtable_64.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1b318e903bf..9bfc4d72fb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index ac5fff4cc58..549144d03d9 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -151,7 +151,7 @@ static inline void native_pgd_clear(pgd_t *pgd) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) #define VMEMMAP_START _AC(0xffffe20000000000, UL) #define MODULES_VADDR _AC(0xffffffffa0000000, UL) -#define MODULES_END _AC(0xfffffffffff00000, UL) +#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 163f6876f5c3ff8215e900b93779e960a56b3694 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:22 -0700 Subject: kexec jump: rename KEXEC_CONTROL_CODE_SIZE to KEXEC_CONTROL_PAGE_SIZE Rename KEXEC_CONTROL_CODE_SIZE to KEXEC_CONTROL_PAGE_SIZE, because control page is used for not only code on some platform. For example in kexec jump, it is used for data and stack too. [akpm@linux-foundation.org: unbreak powerpc and arm, finish conversion] Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Ingo Molnar Cc: Russell King Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/kexec.h | 2 +- arch/arm/kernel/machine_kexec.c | 2 +- arch/ia64/include/asm/kexec.h | 2 +- arch/powerpc/include/asm/kexec.h | 2 +- arch/powerpc/kernel/machine_kexec_32.c | 2 +- arch/s390/include/asm/kexec.h | 2 +- arch/sh/include/asm/kexec.h | 2 +- arch/x86/kernel/machine_kexec_32.c | 2 +- include/asm-mips/kexec.h | 2 +- include/asm-x86/kexec.h | 4 ++-- include/linux/kexec.h | 4 ++-- kernel/kexec.c | 6 +++--- 12 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index c8986bb99ed..df15a0dc228 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -10,7 +10,7 @@ /* Maximum address we can use for the control code buffer */ #define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) -#define KEXEC_CONTROL_CODE_SIZE 4096 +#define KEXEC_CONTROL_PAGE_SIZE 4096 #define KEXEC_ARCH KEXEC_ARCH_ARM diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index db8f54a3451..fae5beb3c3d 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -71,7 +71,7 @@ void machine_kexec(struct kimage *image) flush_icache_range((unsigned long) reboot_code_buffer, - (unsigned long) reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); + (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); cpu_proc_fin(); diff --git a/arch/ia64/include/asm/kexec.h b/arch/ia64/include/asm/kexec.h index 541be835fc5..e1d58f819d7 100644 --- a/arch/ia64/include/asm/kexec.h +++ b/arch/ia64/include/asm/kexec.h @@ -9,7 +9,7 @@ /* Maximum address we can use for the control code buffer */ #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE -#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096) +#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096) /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_IA_64 diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index acdcdc66f1b..3736d9b3328 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -22,7 +22,7 @@ #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE #endif -#define KEXEC_CONTROL_CODE_SIZE 4096 +#define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index cbaa3419679..ae63a964b85 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -51,7 +51,7 @@ void default_machine_kexec(struct kimage *image) relocate_new_kernel_size); flush_icache_range(reboot_code_buffer, - reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); + reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); /* now call it */ diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index f219c6411e0..bb729b84a21 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -31,7 +31,7 @@ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) /* Allocate one page for the pdp and the second for the code */ -#define KEXEC_CONTROL_CODE_SIZE 4096 +#define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 00f4260ef09..765a5e1660f 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -21,7 +21,7 @@ /* Maximum address we can use for the control code buffer */ #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE -#define KEXEC_CONTROL_CODE_SIZE 4096 +#define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 9fe478d9840..466450167de 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -78,7 +78,7 @@ static void load_segments(void) /* * A architecture hook called to validate the * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE * have been allocated, but the segments have yet * been copied into the kernel. * diff --git a/include/asm-mips/kexec.h b/include/asm-mips/kexec.h index cdbab43b7d3..4314892aaeb 100644 --- a/include/asm-mips/kexec.h +++ b/include/asm-mips/kexec.h @@ -16,7 +16,7 @@ /* Maximum address we can use for the control code buffer */ #define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000) -#define KEXEC_CONTROL_CODE_SIZE 4096 +#define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_MIPS diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index c0e52a14fd4..f6fb3d21883 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -63,7 +63,7 @@ /* Maximum address we can use for the control code buffer */ # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE -# define KEXEC_CONTROL_CODE_SIZE 4096 +# define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_386 @@ -79,7 +79,7 @@ # define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) /* Allocate one page for the pdp and the second for the code */ -# define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL) +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_X86_64 diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 32110cede64..17f76fc0517 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -25,8 +25,8 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif -#ifndef KEXEC_CONTROL_CODE_SIZE -#error KEXEC_CONTROL_CODE_SIZE not defined +#ifndef KEXEC_CONTROL_PAGE_SIZE +#error KEXEC_CONTROL_PAGE_SIZE not defined #endif #ifndef KEXEC_ARCH diff --git a/kernel/kexec.c b/kernel/kexec.c index bfbbd120623..2810558802b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -77,7 +77,7 @@ int kexec_should_crash(struct task_struct *p) * * The code for the transition from the current kernel to the * the new kernel is placed in the control_code_buffer, whose size - * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single + * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single * page of memory is necessary, but some architectures require more. * Because this memory must be identity mapped in the transition from * virtual to physical addresses it must live in the range @@ -242,7 +242,7 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; @@ -317,7 +317,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; -- cgit v1.2.3 From fb45daa69d287b394eca1619b3fadff7c0215c71 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:23 -0700 Subject: kexec jump: check code size in control page Kexec/Kexec-jump require code size in control page is less than PAGE_SIZE/2. This patch add link-time checking for this. ASSERT() of ld link script is used as the link-time checking mechanism. [akpm@linux-foundation.org: build fix] Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Acked-by: Vivek Goyal Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 2 +- arch/x86/kernel/relocate_kernel_32.S | 10 +++++++--- arch/x86/kernel/vmlinux_32.lds.S | 8 ++++++++ include/asm-x86/kexec.h | 4 ++++ 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 466450167de..5c8e7735c89 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -138,7 +138,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE/2); + memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 703310a9902..6f50664b2ba 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -20,10 +20,11 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAE_PGD_ATTR (_PAGE_PRESENT) -/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are - * used to save some data for jumping back +/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE + * ~ control_page + PAGE_SIZE are used as data storage and stack for + * jumping back */ -#define DATA(offset) (PAGE_SIZE/2+(offset)) +#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) /* Minimal CPU state */ #define ESP DATA(0x0) @@ -376,3 +377,6 @@ swap_pages: popl %ebx popl %ebp ret + + .globl kexec_control_code_size +.set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d..af5bdad8460 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -209,3 +209,11 @@ SECTIONS DWARF_DEBUG } + +#ifdef CONFIG_KEXEC +/* Link time checks */ +#include + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index f6fb3d21883..4246ab7dc98 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -41,6 +41,10 @@ # define PAGES_NR 17 #endif +#ifdef CONFIG_X86_32 +# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +#endif + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From 3122c331190e9d1622bf1c8cf6ce3b17cca67c9e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 15 Aug 2008 00:40:26 -0700 Subject: kexec jump: fix for ftrace Ftrace depends on some processor state that we destroyed during kexec and restored by restore_processor_state(). So save_processor_state() and restore_processor_state() are moved into machine_kexec() and ftrace is restored after restore_processor_state(). Signed-off-by: Huang Ying Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_32.c | 16 +++++++++++++++- kernel/kexec.c | 2 -- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5c8e7735c89..0732adba05c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + int save_ftrace_enabled; asmlinkage unsigned long (*relocate_kernel_ptr)(unsigned long indirection_page, unsigned long control_page, @@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image) unsigned int has_pae, unsigned int preserve_context); - tracer_disable(); +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + save_processor_state(); +#endif + + save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); @@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image) (unsigned long)page_list, image->start, cpu_has_pae, image->preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + restore_processor_state(); +#endif + + __ftrace_enabled_restore(save_ftrace_enabled); } void arch_crash_save_vmcoreinfo(void) diff --git a/kernel/kexec.c b/kernel/kexec.c index 17c80fdc453..9fc6f7cbd8a 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1469,7 +1469,6 @@ int kernel_kexec(void) error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; - save_processor_state(); } else #endif { @@ -1482,7 +1481,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - restore_processor_state(); device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); -- cgit v1.2.3 From fc0091b3c86396afc8e6c273aff21671cf882ee1 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 15 Aug 2008 17:21:14 +0100 Subject: x86: change init_gdt to update the gdt via write_gdt, rather than a direct write. By writing directly, a memory access violation can occur whilst hotplugging a CPU if the entry was previously marked read-only. Signed-off-by: Alex Nixon Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpcommon.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca..397e309839d 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -8,18 +8,21 @@ DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ +/* + * Initialize the CPU's GDT. This is either the boot CPU doing itself + * (still using the master per-cpu area), or a CPU doing it for a + * secondary which will soon come up. + */ __cpuinit void init_gdt(int cpu) { - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct gdt; - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], - __per_cpu_offset[cpu], 0xFFFFF, + pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; - gdt[GDT_ENTRY_PERCPU].s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; -- cgit v1.2.3 From d554d9a4295dd0595d12eeccbc55d1f495b15176 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..46af7167673 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3 From 67d0c9ebdc9f5f356657146b4e862b2d745e9e78 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:12:37 +0200 Subject: x86: fix MP_processor_info section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1fe7): Section mismatch in reference from the function MP_processor_info() to the variable .init.data:x86_quirks The function __cpuinit MP_processor_info() references a variable __initdata x86_quirks. If x86_quirks is only used by MP_processor_info then annotate x86_quirks with a matching annotation. MP_processor_info uses x86_quirks which is __init and is used only from smp_read_mpc and construct_default_ISA_mptable which are __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 678090508a6..9942fc72b63 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; -- cgit v1.2.3 From c72a5efec1193faa2ef34c0bd48d7251a70ec934 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:11:13 +0200 Subject: x86: mmconf: fix section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1591): Section mismatch in reference from the function init_amd() to the function .init.text:check_enable_amd_mmconf_dmi() The function __cpuinit init_amd() references a function __init check_enable_amd_mmconf_dmi(). If check_enable_amd_mmconf_dmi is only used by init_amd then annotate check_enable_amd_mmconf_dmi with a matching annotation. check_enable_amd_mmconf_dmi is only called from init_amd which is __cpuinit Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- include/asm-x86/mmconfig.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b36..efc2f361fe8 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda07c6f..e293ab81e85 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -3,7 +3,7 @@ #ifdef CONFIG_PCI_MMCONFIG extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); +extern void __cpuinit check_enable_amd_mmconf_dmi(void); #else static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } -- cgit v1.2.3 From 39e00fe20aaad4326ed5e0e3221451732bc7f679 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:09:38 +0200 Subject: x86: mpparse.c: fix section mismatch warning WARNING: vmlinux.o(.text+0x118f7): Section mismatch in reference from the function construct_ioapic_table() to the function .init.text:MP_bus_info() The function construct_ioapic_table() references the function __init MP_bus_info(). This is often because construct_ioapic_table lacks a __init annotation or the annotation of MP_bus_info is wrong. construct_ioapic_table is called only from construct_default_ISA_mptable which is __init Signed-off-by: Marcin Slusarz Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9942fc72b63..b3fb430725c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) -- cgit v1.2.3 From e532c06f2a835b5cc4f4166f467437d9b09c1d0e Mon Sep 17 00:00:00 2001 From: David Fries Date: Sun, 17 Aug 2008 23:03:40 -0500 Subject: x86: fix i486 suspend to disk CR4 oops arch/x86/power/cpu_32.c __save_processor_state calls read_cr4() only a i486 CPU doesn't have the CR4 register. Trying to read it produces an invalid opcode oops during suspend to disk. Use the safe rc4 reading op instead. If the value to be written is zero the write is skipped. arch/x86/power/hibernate_asm_32.S done: swapped the use of %eax and %ecx to use jecxz for the zero test and jump over store to %cr4. restore_image: s/%ecx/%eax/ to be consistent with done: In addition to __save_processor_state, acpi_save_state_mem, efi_call_phys_prelog, and efi_call_phys_epilog had checks added (acpi restore was in assembly and already had a check for non-zero). There were other reads and writes of CR4, but MCE and virtualization shouldn't be executed on a i486 anyway. Signed-off-by: David Fries Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/efi_32.c | 4 ++-- arch/x86/power/cpu_32.c | 6 ++++-- arch/x86/power/hibernate_asm_32.S | 26 +++++++++++++++----------- 4 files changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 81e5ab6542d..426e5d91b63 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -86,7 +86,7 @@ int acpi_save_state_mem(void) #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4(); + header->pmode_cr4 = read_cr4_safe(); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13..5cab48ee61a 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = @@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 7dc5d5cf50a..d3e083dea72 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -45,7 +45,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); - ctxt->cr4 = read_cr4(); + ctxt->cr4 = read_cr4_safe(); } /* Needed by apm.c */ @@ -98,7 +98,9 @@ static void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ - write_cr4(ctxt->cr4); + /* cr4 was introduced in the Pentium CPU */ + if (ctxt->cr4) + write_cr4(ctxt->cr4); write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index b95aa6cfe3c..4fc7e872c85 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -28,9 +28,9 @@ ENTRY(swsusp_arch_suspend) ret ENTRY(restore_image) - movl resume_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 + movl resume_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 movl restore_pblist, %edx .p2align 4,,7 @@ -52,17 +52,21 @@ copy_loop: done: /* go back to the original page tables */ - movl $swapper_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 + movl $swapper_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 /* Flush TLB, including "global" things (vmalloc) */ - movl mmu_cr4_features, %eax - movl %eax, %edx + movl mmu_cr4_features, %ecx + jecxz 1f # cr4 Pentium and higher, skip if zero + movl %ecx, %edx andl $~(1<<7), %edx; # PGE movl %edx, %cr4; # turn off PGE - movl %cr3, %ecx; # flush TLB - movl %ecx, %cr3 - movl %eax, %cr4; # turn PGE back on +1: + movl %cr3, %eax; # flush TLB + movl %eax, %cr3 + jecxz 1f # cr4 Pentium and higher, skip if zero + movl %ecx, %cr4; # turn PGE back on +1: movl saved_context_esp, %esp movl saved_context_ebp, %ebp -- cgit v1.2.3 From d19fbfdfe6a7034c8b6a7062365780485ab5aeaa Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 17 Aug 2008 17:50:51 +0200 Subject: x86: silence section mismatch warning - get_local_pda Take out part of get_local_pda referencing __init function (free_bootmem) to new (static) function marked as __ref. It's safe to do because free_bootmem is called before __init sections are dropped. WARNING: vmlinux.o(.cpuinit.text+0x3cd7): Section mismatch in reference from the function get_local_pda() to the function .init.text:free_bootmem() The function __cpuinit get_local_pda() references a function __init free_bootmem(). If free_bootmem is only used by get_local_pda then annotate free_bootmem with a matching annotation. Signed-off-by: Marcin Slusarz Cc: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a8fb8a980fa..e139e617f42 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -756,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work) } #ifdef CONFIG_X86_64 + +/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ +static void __ref free_bootmem_pda(struct x8664_pda *oldpda) +{ + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); +} + /* * Allocate node local memory for the AP pda. * @@ -784,8 +792,7 @@ int __cpuinit get_local_pda(int cpu) if (oldpda) { memcpy(newpda, oldpda, size); - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, size); + free_bootmem_pda(oldpda); } newpda->in_bootmem = 0; -- cgit v1.2.3 From f71066624d5d91bf179a1ea25da0800b72e20c60 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 17 Aug 2008 17:50:52 +0200 Subject: x86, calgary: fix section mismatch warning - get_tce_space_from_tar WARNING: vmlinux.o(.text+0x27032): Section mismatch in reference from the function get_tce_space_from_tar() to the function .init.text:calgary_bus_has_devices() The function get_tce_space_from_tar() references the function __init calgary_bus_has_devices(). This is often because get_tce_space_from_tar lacks a __init annotation or the annotation of calgary_bus_has_devices is wrong. get_tce_space_from_tar is called only from __init function (calgary_init) and calls __init function (calgary_bus_has_devices). So annotate it properly. Signed-off-by: Marcin Slusarz Cc: Chandru Siddalingappa Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 02d19328525..218d783ed7a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) * Function for kdump case. Get the tce tables from first kernel * by reading the contents of the base adress register of calgary iommu */ -static void get_tce_space_from_tar(void) +static void __init get_tce_space_from_tar(void) { int bus; void __iomem *target; -- cgit v1.2.3 From 8a7c5ef3ba8cca8f9c4f91ddef3d081f517914ae Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 19 Aug 2008 02:13:55 +0200 Subject: x86 iommu: remove unneeded parenthesis The parenthesis in __iommu_queue_command() are not needed when assigning into 'target' variable. Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f2ede..69b4d060b21 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -65,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u8 *target; tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = (iommu->cmd_buf + tail); + target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); -- cgit v1.2.3 From c6744955d0ec0cb485c28c51eeb7185e260f6172 Mon Sep 17 00:00:00 2001 From: Samuel Sieb Date: Wed, 6 Aug 2008 22:06:29 -0700 Subject: x86: fix "kernel won't boot on a Cyrix MediaGXm (Geode)" Cyrix MediaGXm/Cx5530 Unicorn Revision 1.19.3B has stopped booting starting at v2.6.22. The reason is this commit: > commit f25f64ed5bd3c2932493681bdfdb483ea707da0a > Author: Juergen Beisert > Date: Sun Jul 22 11:12:38 2007 +0200 > > x86: Replace NSC/Cyrix specific chipset access macros by inlined functions. this commit activated a macro which was dormant before due to (buggy) macro side-effects. I've looked through various datasheets and found that the GXm and GXLV Geode processors don't have an incrementor. Remove the incrementor setup entirely. As the incrementor value differs according to clock speed and we would hope that the BIOS configures it correctly, it is probably the right solution. Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06..e710a21bb6e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -134,23 +134,6 @@ static void __cpuinit set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - /* * Configure later MediaGX and/or Geode processor. */ @@ -174,7 +157,6 @@ static void __cpuinit geode_configure(void) set_cx86_memwb(); set_cx86_reorder(); - set_cx86_inc(); local_irq_restore(flags); } -- cgit v1.2.3 From 99dd8713306a89f3e106143581244e550e00a644 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 19 Aug 2008 12:51:59 -0500 Subject: x86, SGI UV: hardcode the TLB flush interrupt system vector The UV TLB shootdown mechanism needs a system interrupt vector. Its vector had been hardcoded as 200, but needs to moved to the reserved system vector range so that it does not collide with some device vector. This is still temporary until dynamic system IRQ allocation is provided. But it will be needed when real UV hardware becomes available and runs 2.6.27. Signed-off-by: Cliff Wickman Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 3 ++- include/asm-x86/irq_vectors.h | 1 + include/asm-x86/uv/uv_bau.h | 5 ----- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab..8b8c0d6640f 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) uv_init_blade(blade, node, cur_cpu); cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); return 0; diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index b95d167b7fb..a48c7f2dbdc 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -76,6 +76,7 @@ #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa #define THRESHOLD_APIC_VECTOR 0xf9 +#define UV_BAU_MESSAGE 0xf8 #define INVALIDATE_TLB_VECTOR_END 0xf7 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index 91ac0dfb758..610b6b308e9 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h @@ -40,11 +40,6 @@ #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 -#define UV_BAU_MESSAGE 200 -/* - * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h - * To be dynamically allocated in the future - */ #define UV_NET_ENDPOINT_INTD 0x38 #define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 -- cgit v1.2.3 From 85d577979a8a94a32ca1d828a91c2eb8300bc9f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 18 Aug 2008 11:58:17 +0200 Subject: werror: fix pci calgary Fix an integer comparison always false warning in the PCI Calgary 64 driver. A u8 is being compared to something that's 512 by default, resulting in the following warning: arch/x86/kernel/pci-calgary_64.c:1285: warning: comparison is always false due to limited range of data type This was introduced by patch b34e90b8f0f30151349134f87b5dc6ef75a5218c. Signed-off-by: David Howells Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783ed7a..363a74ec7a7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1269,13 +1269,15 @@ static inline int __init determine_tce_table_size(u64 ram) static int __init build_detail_arrays(void) { unsigned long ptr; - int i, scal_detail_size, rio_detail_size; + unsigned numnodes, i; + int scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + numnodes = rio_table_hdr->num_scal_dev; + if (numnodes > MAX_NUMNODES){ printk(KERN_WARNING "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", - MAX_NUMNODES, rio_table_hdr->num_scal_dev); + MAX_NUMNODES, numnodes); return -ENODEV; } @@ -1296,8 +1298,7 @@ static int __init build_detail_arrays(void) } ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; - i++, ptr += scal_detail_size) + for (i = 0; i < numnodes; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; for (i = 0; i < rio_table_hdr->num_rio_dev; -- cgit v1.2.3 From bde78a79a6eb015f33aa4660df1b06f5135def20 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 8 Jul 2008 09:51:56 -0700 Subject: x86: use WARN() in arch/x86/kernel Use WARN() instead of a printk+WARN_ON() pair; this way the message becomes part of the warning section for better reporting/collection. This also allowed the folding of some if()'s into the WARN() Signed-off-by: Arjan van de Ven Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 5 +---- arch/x86/kernel/pci-calgary_64.c | 3 +-- arch/x86/kernel/tsc_sync.c | 6 ++---- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8fa..b117d7f8a56 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - if (!kvm_para_available()) { - printk(KERN_WARNING + WARN(!kvm_para_available(), KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); - WARN_ON(1); - } return 0; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 363a74ec7a7..dcdac6c826e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { - printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " + WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); - WARN_ON(1); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89..9ffb01c31c4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) __raw_spin_unlock(&sync_lock); } } - if (!(now-start)) { - printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", + WARN(!(now-start), + "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); - WARN_ON(1); - } } /* -- cgit v1.2.3 From 7946612de2087e163308e26034286fc2dc9dacf1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 20 Aug 2008 11:31:07 -0700 Subject: x86: export pv_lock_ops non-GPL None of the spinlock API is exported GPL, so there's no reason for pv_lock_ops to be. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: drago01 --- arch/x86/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..300da17e61c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); -- cgit v1.2.3 From b2a6a58ca6a3ddf4e278a53199c5b8fd39adbc0e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 20 Aug 2008 18:18:26 +0200 Subject: x86: fix BUG: unable to handle kernel paging request (numaq_tsc_disable) This section mismatch: >> Seems to be a section mismatch; init_intel() is __cpuinit while >> numaq_tsc_disable() is __init. Seems to be introduced in: >> >> commit 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a >> Author: Yinghai Lu >> Date: Sat Jul 19 18:01:16 2008 -0700 >> >> x86: extend and use x86_quirks to clean up NUMAQ code > > Oops, I am wrong about numaq_tsc_disable() being __init. Still, I > believe that Yinghai might be able to say what's really wrong :-) Would lead to this crash: BUG: unable to handle kernel paging request at c08a45f0 IP: [] numaq_tsc_disable+0x0/0x40 Fixed by the patch below. Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20..eecc8c18f01 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) } -void __init numaq_tsc_disable(void) +void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) return; -- cgit v1.2.3 From 8323444b5dba3fe55e56a95d20d8f55c1d6745af Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 20 Aug 2008 16:45:53 -0700 Subject: x86: PAT Update validate_pat_support for intel CPUs Pentium III and Core Solo/Duo CPUs have an erratum " Page with PAT set to WC while associated MTRR is UC may consolidate to UC " which can result in WC setting in PAT to be ineffective. We will disable PAT on such CPUs, so that we can continue to use MTRR WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a607..a6ef672adbb 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) return; - break; + + pat_disable("PAT WC disabled due to known CPU erratum."); + return; + case X86_VENDOR_AMD: case X86_VENDOR_CENTAUR: case X86_VENDOR_TRANSMETA: -- cgit v1.2.3 From 38cc1c3df77c1bb739a4766788eb9fa49f16ffdf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 20:24:24 -0700 Subject: x86: work around MTRR mask setting Joshua Hoblitt reported that only 3 GB of his 16 GB of RAM is usable. Booting with mtrr_show showed us the BIOS-initialized MTRR settings - which are all wrong. So the root cause is that the BIOS has not set the mask correctly: > [ 0.429971] MSR00000200: 00000000d0000000 > [ 0.433305] MSR00000201: 0000000ff0000800 > should be ==> [ 0.433305] MSR00000201: 0000003ff0000800 > > [ 0.436638] MSR00000202: 00000000e0000000 > [ 0.439971] MSR00000203: 0000000fe0000800 > should be ==> [ 0.439971] MSR00000203: 0000003fe0000800 > > [ 0.443304] MSR00000204: 0000000000000006 > [ 0.446637] MSR00000205: 0000000c00000800 > should be ==> [ 0.446637] MSR00000205: 0000003c00000800 > > [ 0.449970] MSR00000206: 0000000400000006 > [ 0.453303] MSR00000207: 0000000fe0000800 > should be ==> [ 0.453303] MSR00000207: 0000003fe0000800 > > [ 0.456636] MSR00000208: 0000000420000006 > [ 0.459970] MSR00000209: 0000000ff0000800 > should be ==> [ 0.459970] MSR00000209: 0000003ff0000800 So detect this borkage and add the prefix 111. Signed-off-by: Yinghai Lu Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eac..43102e03e2d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; + unsigned int tmp, hi; rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | tmp; + /* Expand tmp with high bits to all 1s*/ + hi = fls(tmp); + if (hi > 0) { + tmp |= ~((1<<(hi - 1)) - 1); + + if (tmp != mask_lo) { + WARN_ON("mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + mask_lo = tmp; + } + } /* This works correctly if size is a power of two, i.e. a contiguous range. */ -- cgit v1.2.3