diff options
Diffstat (limited to 'arch/x86')
83 files changed, 1147 insertions, 1085 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d14cc6b79ad..2787fbec7aed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,11 +77,13 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP - select HAVE_BPF_JIT if (X86_64 && NET) + select HAVE_BPF_JIT if X86_64 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP - select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC + select DCACHE_WORD_ACCESS + select GENERIC_SMP_IDLE_THREAD + select HAVE_ARCH_SECCOMP_FILTER config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -160,9 +162,6 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD -config ARCH_HAS_CPU_IDLE_WAIT - def_bool y - config GENERIC_CALIBRATE_DELAY def_bool y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 41a7237606a3..277418ff8b52 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +archscripts: + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + ### # Syscall table generation @@ -146,7 +149,6 @@ archheaders: head-y := arch/x86/kernel/head_$(BITS).o head-y += arch/x86/kernel/head$(BITS).o head-y += arch/x86/kernel/head.o -head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fd55a2ff3ad8..e398bb5d63bb 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +targets += vmlinux.bin.all vmlinux.relocs -targets += vmlinux.bin.all vmlinux.relocs relocs -hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs - +CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index a0559930a180..c85e3ac99bba 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -33,6 +33,9 @@ __HEAD ENTRY(startup_32) #ifdef CONFIG_EFI_STUB + jmp preferred_addr + + .balign 0x10 /* * We don't need the return address, so set up the stack so * efi_main() can find its arugments. @@ -41,12 +44,17 @@ ENTRY(startup_32) call efi_main cmpl $0, %eax - je preferred_addr movl %eax, %esi - call 1f + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popl %eax - subl $1b, %eax + subl $3b, %eax subl BP_pref_address(%esi), %eax add BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 558d76ce23bc..87e03a13d8e3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -200,18 +200,28 @@ ENTRY(startup_64) * entire text+data+bss and hopefully all of memory. */ #ifdef CONFIG_EFI_STUB - pushq %rsi + /* + * The entry point for the PE/COFF executable is 0x210, so only + * legacy boot loaders will execute this jmp. + */ + jmp preferred_addr + + .org 0x210 mov %rcx, %rdi mov %rdx, %rsi call efi_main - popq %rsi - cmpq $0,%rax - je preferred_addr movq %rax,%rsi - call 1f + cmpq $0,%rax + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popq %rax - subq $1b, %rax + subq $3b, %rax subq BP_pref_address(%rsi), %rax add BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index ed549767a231..24443a332083 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -205,8 +205,13 @@ int main(int argc, char ** argv) put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); #ifdef CONFIG_X86_32 - /* Address of entry point */ - put_unaligned_le32(i, &buf[pe_header + 0x28]); + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); @@ -217,9 +222,11 @@ int main(int argc, char ** argv) /* * Address of entry point. startup_32 is at the beginning and * the 64-bit entry point (startup_64) is always 512 bytes - * after. + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation */ - put_unaligned_le32(i + 512, &buf[pe_header + 0x28]); + put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d511d951a052..07b3a68d2d29 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end) end = PAGE_ALIGN(end); if (end <= start) return; - down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + vm_brk(start, end - start); } #ifdef CORE_DUMP @@ -296,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); - current->mm->context.ia32_compat = 1; + set_personality_ia32(false); setup_new_exec(bprm); @@ -332,9 +329,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) pos = 32; map_size = ex.a_text+ex.a_data; - down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); - up_write(¤t->mm->mmap_sem); + error = vm_brk(text_addr & PAGE_MASK, map_size); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); @@ -373,9 +368,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; - down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - up_write(¤t->mm->mmap_sem); + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text+ex.a_data, &pos); @@ -385,26 +378,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto beyond_if; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset); - up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -476,9 +465,7 @@ static int load_aout_library(struct file *file) error_time = jiffies; } #endif - down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - up_write(¤t->mm->mmap_sem); + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); @@ -490,12 +477,10 @@ static int load_aout_library(struct file *file) goto out; } /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, N_TXTOFF(ex)); - up_write(¤t->mm->mmap_sem); retval = error; if (error != start_addr) goto out; @@ -503,9 +488,7 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); - up_write(¤t->mm->mmap_sem); + error = vm_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) goto out; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index a69245ba27e3..0b3f2354f6aa 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -67,6 +67,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) switch (from->si_code >> 16) { case __SI_FAULT >> 16: break; + case __SI_SYS >> 16: + put_user_ex(from->si_syscall, &to->si_syscall); + put_user_ex(from->si_arch, &to->si_arch); + break; case __SI_CHLD >> 16: if (ia32) { put_user_ex(from->si_utime, &to->si_utime); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 5e1a2eef3e7c..b13fe63bdc59 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -19,7 +19,7 @@ #ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT #else -#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER) #endif #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b3b733262909..99480e55973d 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -43,7 +43,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock #op "b %b0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ + : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ @@ -173,7 +173,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock "addb %b1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ + : "+m" (*(ptr)) : "qi" (inc) \ : "memory", "cc"); \ break; \ case __X86_CASE_W: \ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index ee52760549f0..b04cbdb138cd 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -144,6 +144,12 @@ typedef struct compat_siginfo { int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + struct { + unsigned int _call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 47d99934580f..5fb9bbbd2f14 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,45 +1,101 @@ -#ifndef _ASM_X86_IRQ_REMAPPING_H -#define _ASM_X86_IRQ_REMAPPING_H +/* + * Copyright (C) 2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This header file contains the interface of the interrupt remapping code to + * the x86 interrupt management code. + */ -#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) +#ifndef __X86_IRQ_REMAPPING_H +#define __X86_IRQ_REMAPPING_H + +#include <asm/io_apic.h> #ifdef CONFIG_IRQ_REMAP -static void irq_remap_modify_chip_defaults(struct irq_chip *chip); -static inline void prepare_irte(struct irte *irte, int vector, - unsigned int dest) + +extern int irq_remapping_enabled; + +extern void setup_irq_remapping_ops(void); +extern int irq_remapping_supported(void); +extern int irq_remapping_prepare(void); +extern int irq_remapping_enable(void); +extern void irq_remapping_disable(void); +extern int irq_remapping_reenable(int); +extern int irq_remap_enable_fault_handling(void); +extern int setup_ioapic_remapped_entry(int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, + int vector, + struct io_apic_irq_attr *attr); +extern int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); +extern void free_remapped_irq(int irq); +extern void compose_remapped_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id); +extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); +extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); + +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 + +static inline void setup_irq_remapping_ops(void) { } +static inline int irq_remapping_supported(void) { return 0; } +static inline int irq_remapping_prepare(void) { return -ENODEV; } +static inline int irq_remapping_enable(void) { return -ENODEV; } +static inline void irq_remapping_disable(void) { } +static inline int irq_remapping_reenable(int eim) { return -ENODEV; } +static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } +static inline int setup_ioapic_remapped_entry(int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, + int vector, + struct io_apic_irq_attr *attr) +{ + return -ENODEV; +} +static inline int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) { - memset(irte, 0, sizeof(*irte)); - - irte->present = 1; - irte->dst_mode = apic->irq_dest_mode; - /* - * Trigger mode in the IRTE will always be edge, and for IO-APIC, the - * actual level or edge trigger will be setup in the IO-APIC - * RTE. This will help simplify level triggered irq migration. - * For more details, see the comments (in io_apic.c) explainig IO-APIC - * irq migration in the presence of interrupt-remapping. - */ - irte->trigger_mode = 0; - irte->dlvry_mode = apic->irq_delivery_mode; - irte->vector = vector; - irte->dest_id = IRTE_DEST(dest); - irte->redir_hint = 1; + return 0; } -static inline bool irq_remapped(struct irq_cfg *cfg) +static inline void free_remapped_irq(int irq) { } +static inline void compose_remapped_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) { - return cfg->irq_2_iommu.iommu != NULL; } -#else -static void prepare_irte(struct irte *irte, int vector, unsigned int dest) +static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { + return -ENODEV; } -static inline bool irq_remapped(struct irq_cfg *cfg) +static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { - return false; + return -ENODEV; } -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { + return -ENODEV; } -#endif +#endif /* CONFIG_IRQ_REMAP */ -#endif /* _ASM_X86_IRQ_REMAPPING_H */ +#endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c3767cfac..183922e13de1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -170,6 +170,9 @@ static inline int kvm_para_available(void) unsigned int eax, ebx, ecx, edx; char signature[13]; + if (boot_cpu_data.cpuid_level < 0) + return 0; /* So we don't blow up on old processors */ + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); memcpy(signature + 0, &ebx, 4); memcpy(signature + 4, &ecx, 4); diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index fd3f9f18cf3f..0e3793b821ef 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void); enum { NMI_LOCAL=0, NMI_UNKNOWN, + NMI_SERR, + NMI_IO_CHECK, NMI_MAX }; @@ -35,8 +37,24 @@ enum { typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); -int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, - const char *); +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + unsigned long flags; + const char *name; +}; + +#define register_nmi_handler(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + +int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index ade619ff9e2a..ef17af013475 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -15,8 +15,8 @@ */ #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) -#define THREAD_ORDER 1 -#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 7639dbf5d223..320f7bb95f76 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,8 +1,8 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_ORDER 1 -#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) #define EXCEPTION_STACK_ORDER 0 diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index 3427b7798dbc..7ef7c3020e5c 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,9 +7,9 @@ #else # ifdef __i386__ # include "posix_types_32.h" -# elif defined(__LP64__) -# include "posix_types_64.h" -# else +# elif defined(__ILP32__) # include "posix_types_x32.h" +# else +# include "posix_types_64.h" # endif #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fa7dcceb6c0..ccbb1ea99ccb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -974,8 +974,6 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ -void cpu_idle_wait(void); - extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 4a085383af27..5ca71c065eef 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -257,7 +257,7 @@ struct sigcontext { __u64 oldmask; __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ -#ifndef __LP64__ +#ifdef __ILP32__ __u32 __fpstate_pad; #endif __u64 reserved1[8]; diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h index fc1aa5535646..34c47b3341c0 100644 --- a/arch/x86/include/asm/siginfo.h +++ b/arch/x86/include/asm/siginfo.h @@ -2,7 +2,13 @@ #define _ASM_X86_SIGINFO_H #ifdef __x86_64__ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# ifdef __ILP32__ /* x32 */ +typedef long long __kernel_si_clock_t __attribute__((aligned(4))); +# define __ARCH_SI_CLOCK_T __kernel_si_clock_t +# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) +# else /* x86-64 */ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# endif #endif #include <asm-generic/siginfo.h> diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 0434c400287c..f8cbc6f20e31 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -62,6 +62,8 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); /* Static state in head.S used to set up a CPU */ extern unsigned long stack_start; /* Initial stack pointer address */ +struct task_struct; + struct smp_ops { void (*smp_prepare_boot_cpu)(void); void (*smp_prepare_cpus)(unsigned max_cpus); @@ -70,7 +72,7 @@ struct smp_ops { void (*stop_other_cpus)(int wait); void (*smp_send_reschedule)(int cpu); - int (*cpu_up)(unsigned cpu); + int (*cpu_up)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); @@ -113,9 +115,9 @@ static inline void smp_cpus_done(unsigned int max_cpus) smp_ops.smp_cpus_done(max_cpus); } -static inline int __cpu_up(unsigned int cpu) +static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - return smp_ops.cpu_up(cpu); + return smp_ops.cpu_up(cpu, tidle); } static inline int __cpu_disable(void) @@ -152,7 +154,7 @@ void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus); -int native_cpu_up(unsigned int cpunum); +int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); void native_cpu_die(unsigned int cpu); void native_play_dead(void); @@ -162,6 +164,7 @@ int wbinvd_on_all_cpus(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); +void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h index e0b1d9bbcbc6..7b3ddc348585 100644 --- a/arch/x86/include/asm/stat.h +++ b/arch/x86/include/asm/stat.h @@ -25,6 +25,12 @@ struct stat { unsigned long __unused5; }; +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT_PADDING(st) do { \ + st.__unused4 = 0; \ + st.__unused5 = 0; \ +} while (0) + #define STAT64_HAS_BROKEN_ST_INO 1 /* This matches struct stat64 in glibc2.1, hence the absolutely @@ -63,6 +69,12 @@ struct stat64 { unsigned long long st_ino; }; +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT64_PADDING(st) do { \ + memset(&st.__pad0, 0, sizeof(st.__pad0)); \ + memset(&st.__pad3, 0, sizeof(st.__pad3)); \ +} while (0) + #else /* __i386__ */ struct stat { @@ -87,6 +99,15 @@ struct stat { unsigned long st_ctime_nsec; long __unused[3]; }; + +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT_PADDING(st) do { \ + st.__pad0 = 0; \ + st.__unused[0] = 0; \ + st.__unused[1] = 0; \ + st.__unused[2] = 0; \ +} while (0) + #endif /* for 32bit emulation and 32 bit kernels */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 386b78686c4d..1ace47b62592 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -13,9 +13,11 @@ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H +#include <linux/audit.h> #include <linux/sched.h> #include <linux/err.h> #include <asm/asm-offsets.h> /* For NR_syscalls */ +#include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> extern const unsigned long sys_call_table[]; @@ -88,6 +90,12 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } +static inline int syscall_get_arch(struct task_struct *task, + struct pt_regs *regs) +{ + return AUDIT_ARCH_I386; +} + #else /* CONFIG_X86_64 */ static inline void syscall_get_arguments(struct task_struct *task, @@ -212,6 +220,25 @@ static inline void syscall_set_arguments(struct task_struct *task, } } +static inline int syscall_get_arch(struct task_struct *task, + struct pt_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entry and then + * remains set until we return to user mode. + * + * TIF_IA32 tasks should always have TS_COMPAT set at + * system call time. + * + * x32 tasks should be considered AUDIT_ARCH_X86_64. + */ + if (task_thread_info(task)->status & TS_COMPAT) + return AUDIT_ARCH_I386; +#endif + /* Both x32 and x86_64 are considered "64-bit". */ + return AUDIT_ARCH_X86_64; +} #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6df8ccd715..73cfe0d309c9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -155,24 +155,6 @@ struct thread_info { #define PREEMPT_ACTIVE 0x10000000 -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) -#else -#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK) -#endif - -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR - -#define alloc_thread_info_node(tsk, node) \ -({ \ - struct page *page = alloc_pages_node(node, THREAD_FLAGS, \ - THREAD_ORDER); \ - struct thread_info *ret = page ? page_address(page) : NULL; \ - \ - ret; \ -}) - #ifdef CONFIG_X86_32 #define STACK_WARN (THREAD_SIZE/8) @@ -282,8 +264,7 @@ static inline bool is_ia32_task(void) #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); -extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -#define arch_task_cache_init arch_task_cache_init +extern void arch_release_task_struct(struct task_struct *tsk); #endif #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d9360..e0544597cfe7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc602..8084bc73b18c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971ad..fcd4b6f3ef02 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 37cdc9d99bb1..4437001d8e3d 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -63,10 +63,10 @@ #else # ifdef __i386__ # include <asm/unistd_32.h> -# elif defined(__LP64__) -# include <asm/unistd_64.h> -# else +# elif defined(__ILP32__) # include <asm/unistd_x32.h> +# else +# include <asm/unistd_64.h> # endif #endif diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 6fe6767b7124..e58f03b206c3 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a) return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); } +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, dummy; + + asm( + "1:\tmov %2,%0\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3:\t" + "lea %2,%1\n\t" + "and %3,%1\n\t" + "mov (%1),%0\n\t" + "leal %2,%%ecx\n\t" + "andl %4,%%ecx\n\t" + "shll $3,%%ecx\n\t" + "shr %%cl,%0\n\t" + "jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + :"=&r" (ret),"=&c" (dummy) + :"m" (*(unsigned long *)addr), + "i" (-sizeof(unsigned long)), + "i" (sizeof(unsigned long)-1)); + return ret; +} + #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index baaca8defec8..764b66a4cf89 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -195,6 +195,5 @@ extern struct x86_msi_ops x86_msi; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); -extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node); #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 532d2e090e6f..56ebd1f98447 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds +extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a415b1f44365..7c439fe4941b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 103b6ab368d3..146a49c763a4 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags; static char temp_stack[4096]; #endif +asmlinkage void acpi_enter_s3(void) +{ + acpi_enter_sleep_state(3, wake_sleep_flags); +} /** * acpi_suspend_lowlevel - save kernel state * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 416d4be13fef..d68677a2a010 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,12 +3,16 @@ */ #include <asm/trampoline.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; extern int wakeup_pmode_return; +extern u8 wake_sleep_flags; +extern asmlinkage void acpi_enter_s3(void); + extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab720573e3..72610839f03b 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -74,9 +74,7 @@ restore_registers: ENTRY(do_suspend_lowlevel) call save_processor_state call save_registers - pushl $3 - call acpi_enter_sleep_state - addl $4, %esp + call acpi_enter_s3 # In case of S3 failure, we'll emerge here. Jump # to ret_point to recover diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8ea5164cbd04..014d1d28c397 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel) movq %rsi, saved_rsi addq $8, %rsp - movl $3, %edi - xorl %eax, %eax - call acpi_enter_sleep_state + call acpi_enter_s3 /* in case something went wrong, restore the machine status and go on */ jmp resume_point diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11544d8f1e97..3722179a49db 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -35,6 +35,7 @@ #include <linux/smp.h> #include <linux/mm.h> +#include <asm/irq_remapping.h> #include <asm/perf_event.h> #include <asm/x86_init.h> #include <asm/pgalloc.h> @@ -1441,8 +1442,8 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); + if (irq_remapping_enabled) + irq_remap_enable_fault_handling(); } @@ -1517,7 +1518,7 @@ void enable_x2apic(void) int __init enable_IR(void) { #ifdef CONFIG_IRQ_REMAP - if (!intr_remapping_supported()) { + if (!irq_remapping_supported()) { pr_debug("intr-remapping not supported\n"); return -1; } @@ -1528,7 +1529,7 @@ int __init enable_IR(void) return -1; } - return enable_intr_remapping(); + return irq_remapping_enable(); #endif return -1; } @@ -1537,10 +1538,13 @@ void __init enable_IR_x2apic(void) { unsigned long flags; int ret, x2apic_enabled = 0; - int dmar_table_init_ret; + int hardware_init_ret; - dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret && !x2apic_supported()) + /* Make sure irq_remap_ops are initialized */ + setup_irq_remapping_ops(); + + hardware_init_ret = irq_remapping_prepare(); + if (hardware_init_ret && !x2apic_supported()) return; ret = save_ioapic_entries(); @@ -1556,7 +1560,7 @@ void __init enable_IR_x2apic(void) if (x2apic_preenabled && nox2apic) disable_x2apic(); - if (dmar_table_init_ret) + if (hardware_init_ret) ret = -1; else ret = enable_IR(); @@ -1637,9 +1641,11 @@ static int __init apic_verify(void) mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + } pr_info("Found and enabled local APIC!\n"); return 0; @@ -1657,13 +1663,15 @@ int __init apic_force_enable(unsigned long addr) * MSR. This can only be done in software for Intel P6 or later * and AMD K7 (Model > 1) or later. */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | addr; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | addr; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } } return apic_verify(); } @@ -2172,8 +2180,8 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (intr_remapping_enabled) - disable_intr_remapping(); + if (irq_remapping_enabled) + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2189,7 +2197,7 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { /* * IO-APIC and PIC have their own resume routines. * We just mask them here to make sure the interrupt @@ -2209,10 +2217,12 @@ static void lapic_resume(void) * FIXME! This will be wrong if we ever support suspend on * SMP! We'll need to do this as part of the CPU restore! */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } } maxlvt = lapic_get_maxlvt(); @@ -2239,8 +2249,8 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (intr_remapping_enabled) - reenable_intr_remapping(x2apic_mode); + if (irq_remapping_enabled) + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 899803e03214..23e75422e013 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -207,8 +207,11 @@ static void __init map_csrs(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + + if (c->phys_proc_id != node) { + c->phys_proc_id = node; + per_cpu(cpu_llc_id, smp_processor_id()) = node; + } } static int __init numachip_system_init(void) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e88300d8e80a..ef0648cd7084 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -86,6 +86,22 @@ void __init set_io_apic_ops(const struct io_apic_ops *ops) io_apic_ops = *ops; } +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return cfg->irq_2_iommu.iommu != NULL; +} +#else +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} +#endif + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -1361,77 +1377,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } - -static int setup_ir_ioapic_entry(int irq, - struct IR_IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - int index; - struct irte irte; - int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); - - if (!iommu) { - pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - return -ENODEV; - } - - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); - return -ENOMEM; - } - - prepare_irte(&irte, vector, destination); - - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, ioapic_id); - - modify_irte(irq, &irte); - - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - attr->ioapic, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); - - memset(entry, 0, sizeof(*entry)); - - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = attr->ioapic_pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr) { - if (intr_remapping_enabled) - return setup_ir_ioapic_entry(irq, - (struct IR_IO_APIC_route_entry *)entry, - destination, vector, attr); + if (irq_remapping_enabled) + return setup_ioapic_remapped_entry(irq, entry, destination, + vector, attr); memset(entry, 0, sizeof(*entry)); @@ -1588,7 +1540,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, { struct IO_APIC_route_entry entry; - if (intr_remapping_enabled) + if (irq_remapping_enabled) return; memset(&entry, 0, sizeof(entry)); @@ -1674,7 +1626,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" " Pol Stat Indx2 Zero Vect:\n"); } else { @@ -1683,7 +1635,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) } for (i = 0; i <= reg_01.bits.entries; i++) { - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { struct IO_APIC_route_entry entry; struct IR_IO_APIC_route_entry *ir_entry; @@ -2050,7 +2002,7 @@ void disable_IO_APIC(void) * IOAPIC RTE as well as interrupt-remapping table entry). * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { + if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -2074,7 +2026,7 @@ void disable_IO_APIC(void) * Use virtual wire A mode when interrupt remapping is enabled. */ if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!intr_remapping_enabled && + disconnect_bsp_APIC(!irq_remapping_enabled && ioapic_i8259.pin != -1); } @@ -2390,71 +2342,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -#ifdef CONFIG_IRQ_REMAP - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For both level and edge triggered, irq migration is a simple atomic - * update(of vector and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we eliminate the io-apic RTE modification (with the - * updated vector information), by using a virtual vector (io-apic pin number). - * Real vector that is used for interrupting cpu will be coming from - * the interrupt-remapping table entry. - * - * As the migration is a simple atomic update of IRTE, the same mechanism - * is used to migrate MSI irq's in the presence of interrupt-remapping. - */ -static int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - cpumask_copy(data->affinity, mask); - return 0; -} - -#else -static inline int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - return 0; -} -#endif - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -2699,7 +2586,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_eoi = ir_ack_apic_level; #ifdef CONFIG_SMP - chip->irq_set_affinity = ir_ioapic_set_affinity; + chip->irq_set_affinity = set_remapped_irq_affinity; #endif } #endif /* CONFIG_IRQ_REMAP */ @@ -2912,7 +2799,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; @@ -2945,7 +2832,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); @@ -3169,7 +3056,7 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); if (irq_remapped(cfg)) - free_irte(irq); + free_remapped_irq(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); @@ -3198,54 +3085,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); if (irq_remapped(cfg)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); - - modify_irte(irq, &irte); + compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); + return err; + } + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else { - if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } return err; } @@ -3288,33 +3155,6 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} - static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { struct irq_chip *chip = &msi_chip; @@ -3345,7 +3185,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; - struct intel_iommu *iommu = NULL; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) @@ -3359,7 +3198,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; - if (!intr_remapping_enabled) + if (!irq_remapping_enabled) goto no_ir; if (!sub_handle) { @@ -3367,23 +3206,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) * allocate the consecutive block of IRTE's * for 'nvec' */ - index = msi_alloc_irte(dev, irq, nvec); + index = msi_alloc_remapped_irq(dev, irq, nvec); if (index < 0) { ret = index; goto error; } } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; + ret = msi_setup_remapped_irq(dev, irq, index, + sub_handle); + if (ret < 0) goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); } no_ir: ret = setup_msi_irq(dev, msidesc, irq); @@ -3501,15 +3333,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) struct msi_msg msg; int ret; - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_hpet_to_ir(id); - int index; - - if (!iommu) - return -1; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) + if (irq_remapping_enabled) { + if (!setup_hpet_msi_remapped(irq, id)) return -1; } @@ -3888,8 +3713,8 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (intr_remapping_enabled) - ir_ioapic_set_affinity(idata, mask, false); + if (irq_remapping_enabled) + set_remapped_irq_affinity(idata, mask, false); else ioapic_set_affinity(idata, mask, false); } diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8a778db45e3a..991e315f4227 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (x2apic_phys) return x2apic_enabled(); + else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && + x2apic_enabled()) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return 1; + } else return 0; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 459e78cbf61e..07b0c0db466c 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2401,7 +2401,7 @@ static void __exit apm_exit(void) * (pm_idle), Wait for all processors to update cached/local * copies of pm_idle before proceeding. */ - cpu_idle_wait(); + kick_all_cpus_sync(); } if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) && (apm_info.connection_version > 0x0100)) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0a44b90602b0..146bb6218eec 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,7 +26,8 @@ * contact AMD for precise details and a CPU swap. * * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html + * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" + * (Publication # 21266 Issue Date: August 1998) * * The following test is erm.. interesting. AMD neglected to up * the chip setting when fixing the bug but they also tweaked some @@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) "system stability may be impaired when more than 32 MB are used.\n"); else printk(KERN_CONT "probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } /* K6 with old style WHCR */ @@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = per_cpu(cpu_llc_id, cpu); /* - * If core numbers are inconsistent, it's likely a multi-fabric platform, - * so invoke platform-specific handler + * On multi-fabric platform (e.g. Numascale NumaChip) a + * platform-specific handler needs to be called to fixup some + * IDs of the CPU. */ - if (c->phys_proc_id != node) + if (x86_cpuinit.fixup_cpu_id) x86_cpuinit.fixup_cpu_id(c, node); if (!node_online(node)) { @@ -579,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* re-enable TopologyExtensions if switched off by BIOS */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + !cpu_has(c, X86_FEATURE_TOPOEXT)) { + u64 val; + + if (!rdmsrl_amd_safe(0xc0011005, &val)) { + val |= 1ULL << 54; + wrmsrl_amd_safe(0xc0011005, val); + rdmsrl(0xc0011005, val); + if (val & (1ULL << 54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + printk(KERN_INFO FW_INFO "CPU: Re-enabling " + "disabled Topology Extensions Support\n"); + } + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e258362a3d..cf79302198a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1163,15 +1163,6 @@ static void dbg_restore_debug_regs(void) #endif /* ! CONFIG_KGDB */ /* - * Prints an error where the NUMA and configured core-number mismatch and the - * platform didn't override this to fix it up - */ -void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node) -{ - pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id); -} - -/* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 73d08ed98a64..b8f3653dddbc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, /* check if @slot is already used or the index is already disabled */ ret = amd_get_l3_disable_slot(nb, slot); if (ret >= 0) - return -EINVAL; + return -EEXIST; if (index > nb->l3_cache.indices) return -EINVAL; /* check whether the other slot has disabled the same index already */ if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EINVAL; + return -EEXIST; amd_l3_disable_index(nb, cpu, slot, index); @@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); if (err) { if (err == -EEXIST) - printk(KERN_WARNING "L3 disable slot %d in use!\n", - slot); + pr_warning("L3 slot %d in use/index already disabled!\n", + slot); return err; } return count; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09c087d..11c9166c3337 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -945,9 +945,10 @@ struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr; + int restartable; } mce_info[MCE_INFO_MAX]; -static void mce_save_info(__u64 addr) +static void mce_save_info(__u64 addr, int c) { struct mce_info *mi; @@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr) if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { mi->t = current; mi->paddr = addr; + mi->restartable = c; return; } } @@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { /* schedule action before return to userland */ - mce_save_info(m.addr); + mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); set_thread_flag(TIF_MCE_NOTIFY); } else if (kill_it) { force_sig(SIGBUS, current); @@ -1179,7 +1181,13 @@ void mce_notify_process(void) pr_err("Uncorrected hardware memory error in user-access at %llx", mi->paddr); - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || + mi->restartable == 0) { pr_err("Memory error not recovered"); force_sig(SIGBUS, current); } diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7734bcbb5a3a..2d6e6498c176 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -235,6 +235,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); + tsk->thread.fpu.last_cpu = ~0; return 0; } diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c deleted file mode 100644 index 43e9ccf44947..000000000000 --- a/arch/x86/kernel/init_task.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data..cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 58b7f27cb3e9..344faf8d0d62 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu) return; irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); + THREADINFO_GFP, + THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; @@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu) per_cpu(hardirq_ctx, cpu) = irqctx; irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); + THREADINFO_GFP, + THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ba6e4a27e4..e554e5ad2fe8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -79,7 +79,6 @@ struct kvm_task_sleep_node { u32 token; int cpu; bool halted; - struct mm_struct *mm; }; static struct kvm_task_sleep_head { @@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.mm = current->active_mm; n.halted = idle || preempt_count() > 1; - atomic_inc(&n.mm->mm_count); init_waitqueue_head(&n.wq); hlist_add_head(&n.link, &b->list); spin_unlock(&b->lock); @@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (!n->mm) - return; - mmdrop(n->mm); if (n->halted) smp_send_reschedule(n->cpu); else if (waitqueue_active(&n->wq)) @@ -207,7 +201,7 @@ again: * async PF was not yet handled. * Add dummy entry for the token. */ - n = kmalloc(sizeof(*n), GFP_ATOMIC); + n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { /* * Allocation failed! Busy wait while other cpu @@ -219,7 +213,6 @@ again: } n->token = token; n->cpu = smp_processor_id(); - n->mm = NULL; init_waitqueue_head(&n->wq); hlist_add_head(&n->link, &b->list); } else diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 73465aab28f8..8a2ce8fd41c0 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -82,11 +82,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); - return -1; - } - csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); @@ -380,6 +375,13 @@ static struct microcode_ops microcode_amd_ops = { struct microcode_ops * __init init_amd_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + return NULL; + } + patch = (void *)get_zeroed_page(GFP_KERNEL); if (!patch) return NULL; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 87a0f8688301..c9bda6d6035c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -419,10 +419,8 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) { - sysfs_remove_group(&dev->kobj, &mc_attr_group); + if (microcode_init_cpu(cpu) == UCODE_ERROR) return -EINVAL; - } return err; } @@ -528,11 +526,11 @@ static int __init microcode_init(void) microcode_ops = init_intel_microcode(); else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); - - if (!microcode_ops) { + else pr_err("no support for this CPU vendor\n"); + + if (!microcode_ops) return -ENODEV; - } microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3ca42d0e43a2..0327e2b3c408 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("CPU%d not a capable Intel processor\n", cpu_num); - return -1; - } - csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { @@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = { struct microcode_ops * __init init_intel_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + pr_err("Intel CPU family 0x%x not supported\n", c->x86); + return NULL; + } + return µcode_intel_ops; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 47acaf319165..585be4bd71a5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -31,14 +31,6 @@ #include <asm/nmi.h> #include <asm/x86_init.h> -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - struct nmi_desc { spinlock_t lock; struct list_head head; @@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] = .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), .head = LIST_HEAD_INIT(nmi_desc[1].head), }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), + .head = LIST_HEAD_INIT(nmi_desc[2].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), + .head = LIST_HEAD_INIT(nmi_desc[3].head), + }, }; @@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, return handled; } -static int __setup_nmi(unsigned int type, struct nmiaction *action) +int __register_nmi_handler(unsigned int type, struct nmiaction *action) { struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; + if (!action->handler) + return -EINVAL; + spin_lock_irqsave(&desc->lock, flags); /* @@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) * to manage expectations */ WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); /* * some handlers need to be executed first otherwise a fake @@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) spin_unlock_irqrestore(&desc->lock, flags); return 0; } +EXPORT_SYMBOL(__register_nmi_handler); -static struct nmiaction *__free_nmi(unsigned int type, const char *name) +void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *n; @@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name) spin_unlock_irqrestore(&desc->lock, flags); synchronize_rcu(); - return (n); } - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_SERR, regs, false)) + return; + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", reason, smp_processor_id()); @@ -241,6 +202,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_IO_CHECK, regs, false)) + return; + pr_emerg( "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", reason, smp_processor_id()); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1d92a5ab6e8b..e8173154800d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -27,6 +27,15 @@ #include <asm/debugreg.h> #include <asm/nmi.h> +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data..cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; + #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -67,10 +76,9 @@ void free_thread_xstate(struct task_struct *tsk) fpu_free(&tsk->thread.fpu); } -void free_thread_info(struct thread_info *ti) +void arch_release_task_struct(struct task_struct *tsk) { - free_thread_xstate(ti->task); - free_pages((unsigned long)ti, THREAD_ORDER); + free_thread_xstate(tsk); } void arch_task_cache_init(void) @@ -516,26 +524,6 @@ void stop_this_cpu(void *dummy) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 733ca39f367e..43d8b48b23e6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -423,6 +423,7 @@ void set_personality_ia32(bool x32) current_thread_info()->status |= TS_COMPAT; } } +EXPORT_SYMBOL_GPL(set_personality_ia32); unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 685845cf16e0..13b1990c7c58 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - secure_computing(regs->orig_ax); + if (secure_computing(regs->orig_ax)) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1L; + goto out; + } if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; @@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs) regs->dx, regs->r10); #endif +out: return ret ?: regs->orig_ax; } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 71f4727da373..5a98aa272184 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void) #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { - const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + size_t atom_size; + /* + * On 64bit, use PMD_SIZE for atom_size so that embedded + * percpu areas are aligned to PMD. This, in the future, + * can also allow using PMD mappings in vmalloc area. Use + * PAGE_SIZE on 32bit as vmalloc space is highly contended + * and large vmalloc area allocs can easily fail. + */ +#ifdef CONFIG_X86_64 + atom_size = PMD_SIZE; +#else + atom_size = PAGE_SIZE; +#endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6e1e406038c2..3acaf51dfddb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -76,20 +76,8 @@ /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ #ifdef CONFIG_HOTPLUG_CPU /* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) - -/* * We need this for trampoline_base protection from concurrent accesses when * off- and onlining cores wildly. */ @@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); void cpu_hotplug_driver_lock(void) { - mutex_lock(&x86_cpu_hotplug_driver_mutex); + mutex_lock(&x86_cpu_hotplug_driver_mutex); } void cpu_hotplug_driver_unlock(void) { - mutex_unlock(&x86_cpu_hotplug_driver_mutex); + mutex_unlock(&x86_cpu_hotplug_driver_mutex); } ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } -#else -static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif /* Number of siblings per CPU package */ @@ -618,22 +602,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) return (send_status | accept_status); } -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - /* reduce the number of lines printed when booting a large cpu count system */ static void __cpuinit announce_cpu(int cpu, int apicid) { @@ -660,58 +628,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int __cpuinit do_boot_cpu(int apicid, int cpu) +static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { unsigned long boot_error = 0; unsigned long start_ip; int timeout; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - - INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); - c_idle.idle = get_idle_for_cpu(cpu); - - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - if (c_idle.idle) { - c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); - init_idle(c_idle.idle, cpu); - goto do_rest; - } + idle->thread.sp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + task_stack_page(idle))) - 1); + per_cpu(current_task, cpu) = idle; - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - - if (IS_ERR(c_idle.idle)) { - printk("failed fork for CPU %d\n", cpu); - destroy_work_on_stack(&c_idle.work); - return PTR_ERR(c_idle.idle); - } - - set_idle_for_cpu(cpu, c_idle.idle); -do_rest: - per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(c_idle.idle) - + (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; - stack_start = c_idle.idle->thread.sp; + stack_start = idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = trampoline_address(); @@ -813,12 +754,10 @@ do_rest: */ smpboot_restore_warm_reset_vector(); } - - destroy_work_on_stack(&c_idle.work); return boot_error; } -int __cpuinit native_cpu_up(unsigned int cpu) +int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; @@ -851,7 +790,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - err = do_boot_cpu(apicid, cpu); + err = do_boot_cpu(apicid, cpu, tidle); if (err) { pr_debug("do_boot_cpu failed %d\n", err); return -EIO; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index f386dc49f988..7515cf0e1805 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) current_thread_info()->sig_on_uaccess_error = 1; /* - * 0 is a valid user pointer (in the access_ok sense) on 32-bit and + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, 0 means "don't write anything" not "write it at + * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ ret = -EFAULT; @@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, - 0); + NULL); break; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e9f265fd79ae..9cf71d0b2d37 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -93,7 +93,6 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, - .fixup_cpu_id = x86_default_fixup_cpu_id, }; static void default_nmi_init(void) { }; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 173df38dbda5..2e88438ffd83 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); if (pmu->version == 1) { - pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; - return; + pmu->nr_arch_fixed_counters = 0; + } else { + pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), + X86_PMC_MAX_FIXED); + pmu->counter_bitmask[KVM_PMC_FIXED] = + ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; } - pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), - X86_PMC_MAX_FIXED); - pmu->counter_bitmask[KVM_PMC_FIXED] = - ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; - pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) - | (((1ull << pmu->nr_arch_fixed_counters) - 1) - << X86_PMC_IDX_FIXED)); + pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | + (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); + pmu->global_ctrl_mask = ~pmu->global_ctrl; } void kvm_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad85adfef843..4ff0ab9bc3c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); kvm_set_shared_msr(msr->index, msr->data, msr->mask); + preempt_enable(); + } break; } ret = kvm_set_msr_common(vcpu, msr_index, data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0bf7c1..185a2b823a2d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (npages && !old.rmap) { unsigned long userspace_addr; - down_write(¤t->mm->mmap_sem); - userspace_addr = do_mmap(NULL, 0, + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, map_flags, 0); - up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)userspace_addr)) return PTR_ERR((void *)userspace_addr); @@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!user_alloc && !old.user_alloc && old.rmap && !npages) { int ret; - down_write(¤t->mm->mmap_sem); - ret = do_munmap(current->mm, old.userspace_addr, + ret = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); - up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " @@ -6585,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_inject_page_fault(vcpu, &fault); } vcpu->arch.apf.halted = false; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 25feb1ae71c5..b1e6c4b2e8eb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -379,8 +379,8 @@ err_out: return; } -/* Decode moffset16/32/64 */ -static void __get_moffset(struct insn *insn) +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: @@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn) insn->moffset2.value = get_next(int, insn); insn->moffset2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->moffset1.got = insn->moffset2.got = 1; + return 1; + err_out: - return; + return 0; } -/* Decode imm v32(Iz) */ -static void __get_immv32(struct insn *insn) +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn) insn->immediate.value = get_next(int, insn); insn->immediate.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } + return 1; + err_out: - return; + return 0; } -/* Decode imm v64(Iv/Ov) */ -static void __get_immv(struct insn *insn) +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn) insn->immediate2.value = get_next(int, insn); insn->immediate2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /* Decode ptr16:16/32(Ap) */ -static void __get_immptr(struct insn *insn) +static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn) break; case 8: /* ptr16:64 is not exist (no segment) */ - return; + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate2.value = get_next(unsigned short, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /** @@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn) insn_get_displacement(insn); if (inat_has_moffset(insn->attr)) { - __get_moffset(insn); + if (!__get_moffset(insn)) + goto err_out; goto done; } @@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn) insn->immediate2.nbytes = 4; break; case INAT_IMM_PTR: - __get_immptr(insn); + if (!__get_immptr(insn)) + goto err_out; break; case INAT_IMM_VWORD32: - __get_immv32(insn); + if (!__get_immv32(insn)) + goto err_out; break; case INAT_IMM_VWORD: - __get_immv(insn); + if (!__get_immv(insn)) + goto err_out; break; default: - break; + /* Here, insn must have an immediate, but failed */ + goto err_out; } if (inat_has_second_immediate(insn->attr)) { insn->immediate2.value = get_next(char, insn); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb54483..2e4e4b02c37a 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include <linux/highmem.h> #include <linux/module.h> +#include <asm/word-at-a-time.h> + /* * best effort, GUP based copy_from_user() that is NMI-safe */ @@ -41,3 +43,100 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) +{ + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + while (max >= sizeof(unsigned long)) { + unsigned long c, mask; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + mask = has_zero(c); + if (mask) { + mask = (mask - 1) & ~mask; + mask >>= 7; + *(unsigned long *)(dst+res) = c & mask; + return res + count_masked_bytes(mask); + } + *(unsigned long *)(dst+res) = c; + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return res; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = current_thread_info()->addr_limit.seg; + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094ca7aaa..ef2a6a5d78e3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849ffb66..0d0326f388c0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,55 +9,6 @@ #include <asm/uaccess.h> /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ed2835e148b5..fc09c2754e08 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -9,11 +9,11 @@ struct pci_root_info { struct acpi_device *bridge; - char *name; + char name[16]; unsigned int res_num; struct resource *res; - struct list_head *resources; int busnum; + struct pci_sysdata sd; }; static bool pci_use_crs = true; @@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } -static bool resource_contains(struct resource *res, resource_size_t point) -{ - if (res->start <= point && point <= res->end) - return true; - return false; -} - static void coalesce_windows(struct pci_root_info *info, unsigned long type) { int i, j; @@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type) * our resources no longer match the ACPI _CRS, but * the kernel resource tree doesn't allow overlaps. */ - if (resource_contains(res1, res2->start) || - resource_contains(res1, res2->end) || - resource_contains(res2, res1->start) || - resource_contains(res2, res1->end)) { + if (resource_overlaps(res1, res2)) { res1->start = min(res1->start, res2->start); res1->end = max(res1->end, res2->end); dev_info(&info->bridge->dev, @@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type) } } -static void add_resources(struct pci_root_info *info) +static void add_resources(struct pci_root_info *info, + struct list_head *resources) { int i; struct resource *res, *root, *conflict; @@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info) "ignoring host bridge window %pR (conflicts with %s %pR)\n", res, conflict->name, conflict); else - pci_add_resource(info->resources, res); + pci_add_resource(resources, res); } } +static void free_pci_root_info_res(struct pci_root_info *info) +{ + kfree(info->res); + info->res = NULL; + info->res_num = 0; +} + +static void __release_pci_root_info(struct pci_root_info *info) +{ + int i; + struct resource *res; + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (!res->parent) + continue; + + if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) + continue; + + release_resource(res); + } + + free_pci_root_info_res(info); + + kfree(info); +} +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ + struct pci_root_info *info = bridge->release_data; + + __release_pci_root_info(info); +} + static void -get_current_resources(struct acpi_device *device, int busnum, - int domain, struct list_head *resources) +probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, + int busnum, int domain) { - struct pci_root_info info; size_t size; - info.bridge = device; - info.res_num = 0; - info.resources = resources; + info->bridge = device; + info->res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, - &info); - if (!info.res_num) + info); + if (!info->res_num) return; - size = sizeof(*info.res) * info.res_num; - info.res = kmalloc(size, GFP_KERNEL); - if (!info.res) + size = sizeof(*info->res) * info->res_num; + info->res_num = 0; + info->res = kmalloc(size, GFP_KERNEL); + if (!info->res) return; - info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); - if (!info.name) - goto name_alloc_fail; + sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); - info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, - &info); - - if (pci_use_crs) { - add_resources(&info); - - return; - } - - kfree(info.name); - -name_alloc_fail: - kfree(info.res); + info); } struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; + struct pci_root_info *info = NULL; int domain = root->segment; int busnum = root->secondary.start; LIST_HEAD(resources); @@ -389,17 +401,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) if (node != -1 && !node_online(node)) node = -1; - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } + sd = &info->sd; sd->domain = domain; sd->node = node; /* @@ -413,22 +422,32 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); - kfree(sd); + kfree(info); } else { - get_current_resources(device, busnum, domain, &resources); + probe_pci_root_info(info, device, busnum, domain); /* * _CRS with no apertures is normal, so only fall back to * defaults or native bridge info if we're ignoring _CRS. */ - if (!pci_use_crs) + if (pci_use_crs) + add_resources(info, &resources); + else { + free_pci_root_info_res(info); x86_pci_root_bus_resources(busnum, &resources); + } + bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); - if (bus) + if (bus) { bus->subordinate = pci_scan_child_bus(bus); - else + pci_set_host_bridge_release( + to_pci_host_bridge(bus->bridge), + release_pci_root_info, info); + } else { pci_free_resource_list(&resources); + __release_pci_root_info(info); + } } /* After the PCI-E bus has been walked and all devices discovered, @@ -445,9 +464,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } } - if (!bus) - kfree(sd); - if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 0567df3890e1..5aed49bff058 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -32,6 +32,27 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { #define RANGE_NUM 16 +static struct pci_root_info __init *find_pci_root_info(int node, int link) +{ + struct pci_root_info *info; + + /* find the position */ + list_for_each_entry(info, &pci_root_infos, list) + if (info->node == node && info->link == link) + return info; + + return NULL; +} + +static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node) +{ +#ifdef CONFIG_NUMA + int j; + + for (j = min_bus; j <= max_bus; j++) + set_mp_bus_to_node(j, node); +#endif +} /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus @@ -41,7 +62,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { static int __init early_fill_mp_bus_info(void) { int i; - int j; unsigned bus; unsigned slot; int node; @@ -50,7 +70,6 @@ static int __init early_fill_mp_bus_info(void) int def_link; struct pci_root_info *info; u32 reg; - struct resource *res; u64 start; u64 end; struct range range[RANGE_NUM]; @@ -86,7 +105,6 @@ static int __init early_fill_mp_bus_info(void) if (!found) return 0; - pci_root_num = 0; for (i = 0; i < 4; i++) { int min_bus; int max_bus; @@ -99,19 +117,11 @@ static int __init early_fill_mp_bus_info(void) min_bus = (reg >> 16) & 0xff; max_bus = (reg >> 24) & 0xff; node = (reg >> 4) & 0x07; -#ifdef CONFIG_NUMA - for (j = min_bus; j <= max_bus; j++) - set_mp_bus_to_node(j, node); -#endif + set_mp_bus_range_to_node(min_bus, max_bus, node); link = (reg >> 8) & 0x03; - info = &pci_root_info[pci_root_num]; - info->bus_min = min_bus; - info->bus_max = max_bus; - info->node = node; - info->link = link; + info = alloc_pci_root_info(min_bus, max_bus, node, link); sprintf(info->name, "PCI Bus #%02x", min_bus); - pci_root_num++; } /* get the default node and link for left over res */ @@ -134,16 +144,10 @@ static int __init early_fill_mp_bus_info(void) link = (reg >> 4) & 0x03; end = (reg & 0xfff000) | 0xfff; - /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == node && info->link == link) - break; - } - if (j == pci_root_num) + info = find_pci_root_info(node, link); + if (!info) continue; /* not found */ - info = &pci_root_info[j]; printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", node, link, start, end); @@ -155,13 +159,8 @@ static int __init early_fill_mp_bus_info(void) } /* add left over io port range to def node/link, [0, 0xffff] */ /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == def_node && info->link == def_link) - break; - } - if (j < pci_root_num) { - info = &pci_root_info[j]; + info = find_pci_root_info(def_node, def_link); + if (info) { for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; @@ -214,16 +213,10 @@ static int __init early_fill_mp_bus_info(void) end <<= 8; end |= 0xffff; - /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == node && info->link == link) - break; - } - if (j == pci_root_num) - continue; /* not found */ + info = find_pci_root_info(node, link); - info = &pci_root_info[j]; + if (!info) + continue; printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", node, link, start, end); @@ -291,14 +284,8 @@ static int __init early_fill_mp_bus_info(void) * add left over mmio range to def node/link ? * that is tricky, just record range in from start_min to 4G */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == def_node && info->link == def_link) - break; - } - if (j < pci_root_num) { - info = &pci_root_info[j]; - + info = find_pci_root_info(def_node, def_link); + if (info) { for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; @@ -309,20 +296,16 @@ static int __init early_fill_mp_bus_info(void) } } - for (i = 0; i < pci_root_num; i++) { - int res_num; + list_for_each_entry(info, &pci_root_infos, list) { int busnum; + struct pci_root_res *root_res; - info = &pci_root_info[i]; - res_num = info->res_num; busnum = info->bus_min; printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); - for (j = 0; j < res_num; j++) { - res = &info->res[j]; - printk(KERN_DEBUG "bus: %02x index %x %pR\n", - busnum, j, res); - } + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_DEBUG "bus: %02x %pR\n", + busnum, &root_res->res); } return 0; diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index f3a7c569a403..614392ced7d6 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -22,19 +22,15 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) { struct pci_root_info *info; + struct pci_root_res *root_res; struct resource res; u16 word1, word2; u8 fbus, lbus; - int i; - - info = &pci_root_info[pci_root_num]; - pci_root_num++; /* read the PCI bus numbers */ fbus = read_pci_config_byte(bus, slot, func, 0x44); lbus = read_pci_config_byte(bus, slot, func, 0x45); - info->bus_min = fbus; - info->bus_max = lbus; + info = alloc_pci_root_info(fbus, lbus, 0, 0); /* * Add the legacy IDE ports on bus 0 @@ -86,8 +82,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) res.flags = IORESOURCE_BUS; printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); - for (i = 0; i < info->res_num; i++) - printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_INFO "host bridge window %pR\n", &root_res->res); } static int __init broadcom_postcore_init(void) diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index fd3f65510e9d..306579f7d0fd 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -4,35 +4,38 @@ #include "bus_numa.h" -int pci_root_num; -struct pci_root_info pci_root_info[PCI_ROOT_NR]; +LIST_HEAD(pci_root_infos); -void x86_pci_root_bus_resources(int bus, struct list_head *resources) +static struct pci_root_info *x86_find_pci_root_info(int bus) { - int i; - int j; struct pci_root_info *info; - if (!pci_root_num) - goto default_resources; + if (list_empty(&pci_root_infos)) + return NULL; - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == bus) - break; - } + list_for_each_entry(info, &pci_root_infos, list) + if (info->bus_min == bus) + return info; + + return NULL; +} - if (i == pci_root_num) +void x86_pci_root_bus_resources(int bus, struct list_head *resources) +{ + struct pci_root_info *info = x86_find_pci_root_info(bus); + struct pci_root_res *root_res; + + if (!info) goto default_resources; printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", bus); - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { + list_for_each_entry(root_res, &info->resources, list) { struct resource *res; struct resource *root; - res = &info->res[j]; + res = &root_res->res; pci_add_resource(resources, res); if (res->flags & IORESOURCE_IO) root = &ioport_resource; @@ -53,11 +56,32 @@ default_resources: pci_add_resource(resources, &iomem_resource); } +struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link) +{ + struct pci_root_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return info; + + INIT_LIST_HEAD(&info->resources); + info->bus_min = bus_min; + info->bus_max = bus_max; + info->node = node; + info->link = link; + + list_add_tail(&info->list, &pci_root_infos); + + return info; +} + void __devinit update_res(struct pci_root_info *info, resource_size_t start, resource_size_t end, unsigned long flags, int merge) { - int i; struct resource *res; + struct pci_root_res *root_res; if (start > end) return; @@ -69,11 +93,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start, goto addit; /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { + list_for_each_entry(root_res, &info->resources, list) { resource_size_t final_start, final_end; resource_size_t common_start, common_end; - res = &info->res[i]; + res = &root_res->res; if (res->flags != flags) continue; @@ -93,14 +117,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start, addit: /* need to add that */ - if (info->res_num >= RES_NUM) + root_res = kzalloc(sizeof(*root_res), GFP_KERNEL); + if (!root_res) return; - res = &info->res[info->res_num]; + res = &root_res->res; res->name = info->name; res->flags = flags; res->start = start; res->end = end; - res->child = NULL; - info->res_num++; + + list_add_tail(&root_res->list, &info->resources); } diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 804a4b40c31a..226a466b2b2b 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -4,22 +4,24 @@ * sub bus (transparent) will use entres from 3 to store extra from * root, so need to make sure we have enough slot there. */ -#define RES_NUM 16 +struct pci_root_res { + struct list_head list; + struct resource res; +}; + struct pci_root_info { + struct list_head list; char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; + struct list_head resources; int bus_min; int bus_max; int node; int link; }; -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -extern int pci_root_num; -extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; - +extern struct list_head pci_root_infos; +struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link); extern void update_res(struct pci_root_info *info, resource_size_t start, resource_size_t end, unsigned long flags, int merge); #endif diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 323481e06ef8..0ad990a20d4a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -11,6 +11,7 @@ #include <linux/dmi.h> #include <linux/slab.h> +#include <asm-generic/pci-bridge.h> #include <asm/acpi.h> #include <asm/segment.h> #include <asm/io.h> @@ -229,6 +230,14 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d) } #endif +static int __devinit set_scan_all(const struct dmi_system_id *d) +{ + printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n", + d->ident); + pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); + return 0; +} + static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { #ifdef __i386__ /* @@ -420,6 +429,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), }, }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ftServer"), + }, + }, {} }; @@ -430,9 +446,7 @@ void __init dmi_check_pciprobe(void) struct pci_bus * __devinit pcibios_scan_root(int busnum) { - LIST_HEAD(resources); struct pci_bus *bus = NULL; - struct pci_sysdata *sd; while ((bus = pci_find_next_bus(bus)) != NULL) { if (bus->number == busnum) { @@ -441,28 +455,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) } } - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); - return NULL; - } - - sd->node = get_mp_bus_to_node(busnum); - - printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); - x86_pci_root_bus_resources(busnum, &resources); - bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); - if (!bus) { - pci_free_resource_list(&resources); - kfree(sd); - } - - return bus; + return pci_scan_bus_on_node(busnum, &pci_root_ops, + get_mp_bus_to_node(busnum)); } + void __init pcibios_set_cache_line_size(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -656,6 +652,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, } sd->node = node; x86_pci_root_bus_resources(busno, &resources); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno); bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); if (!bus) { pci_free_resource_list(&resources); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 831971e731f7..dd8ca6f7223b 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -57,7 +57,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) { struct pcibios_fwaddrmap *map; - WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); + WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); list_for_each_entry(map, &pcibios_fwaddrmappings, list) if (map->dev == dev) diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 66d377e334f7..646e3b5b4bb6 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -63,7 +63,7 @@ static struct gpio_led net5501_leds[] = { .name = "net5501:1", .gpio = 6, .default_trigger = "default-on", - .active_low = 1, + .active_low = 0, }, }; diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e0a37233c0af..e31bcd8f2eee 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -805,7 +805,7 @@ void intel_scu_devices_create(void) } else i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); } - intel_scu_notifier_post(SCU_AVAILABLE, 0L); + intel_scu_notifier_post(SCU_AVAILABLE, NULL); } EXPORT_SYMBOL_GPL(intel_scu_devices_create); @@ -814,7 +814,7 @@ void intel_scu_devices_destroy(void) { int i; - intel_scu_notifier_post(SCU_DOWN, 0L); + intel_scu_notifier_post(SCU_DOWN, NULL); for (i = 0; i < ipc_next_dev; i++) platform_device_del(ipc_devs[i]); diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore new file mode 100644 index 000000000000..be0ed065249b --- /dev/null +++ b/arch/x86/tools/.gitignore @@ -0,0 +1 @@ +relocs diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index d511aa97533a..733057b435b0 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs-y += relocs +relocs: $(obj)/relocs diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/tools/relocs.c index d3c0b0277666..b43cfcd9bf40 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/tools/relocs.c @@ -18,6 +18,8 @@ static void die(char *fmt, ...); static Elf32_Ehdr ehdr; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; +static unsigned long reloc16_count, reloc16_idx; +static unsigned long *relocs16; struct section { Elf32_Shdr shdr; @@ -28,52 +30,86 @@ struct section { }; static struct section *secs; +enum symtype { + S_ABS, + S_REL, + S_SEG, + S_LIN, + S_NSYMTYPES +}; + +static const char * const sym_regex_kernel[S_NSYMTYPES] = { /* * Following symbols have been audited. There values are constant and do * not change if bzImage is loaded at a different physical address than * the address for which it has been compiled. Don't warn user about * absolute relocations present w.r.t these symbols. */ -static const char abs_sym_regex[] = + [S_ABS] = "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" - "__crc_)"; -static regex_t abs_sym_regex_c; -static int is_abs_reloc(const char *sym_name) -{ - return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); -} + "__crc_)", /* * These symbols are known to be relative, even if the linker marks them * as absolute (typically defined outside any section in the linker script.) */ -static const char rel_sym_regex[] = - "^_end$"; -static regex_t rel_sym_regex_c; -static int is_rel_reloc(const char *sym_name) + [S_REL] = + "^(__init_(begin|end)|" + "__x86_cpu_dev_(start|end)|" + "(__parainstructions|__alt_instructions)(|_end)|" + "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "_end)$" +}; + + +static const char * const sym_regex_realmode[S_NSYMTYPES] = { +/* + * These are 16-bit segment symbols when compiling 16-bit code. + */ + [S_SEG] = + "^real_mode_seg$", + +/* + * These are offsets belonging to segments, as opposed to linear addresses, + * when compiling 16-bit code. + */ + [S_LIN] = + "^pa_", +}; + +static const char * const *sym_regex; + +static regex_t sym_regex_c[S_NSYMTYPES]; +static int is_reloc(enum symtype type, const char *sym_name) { - return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); + return sym_regex[type] && + !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); } -static void regex_init(void) +static void regex_init(int use_real_mode) { char errbuf[128]; int err; - - err = regcomp(&abs_sym_regex_c, abs_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); - } + int i; + + if (use_real_mode) + sym_regex = sym_regex_realmode; + else + sym_regex = sym_regex_kernel; - err = regcomp(&rel_sym_regex_c, rel_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); + for (i = 0; i < S_NSYMTYPES; i++) { + if (!sym_regex[i]) + continue; + + err = regcomp(&sym_regex_c[i], sym_regex[i], + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); + die("%s", errbuf); + } } } @@ -154,6 +190,10 @@ static const char *rel_type(unsigned type) REL_TYPE(R_386_RELATIVE), REL_TYPE(R_386_GOTOFF), REL_TYPE(R_386_GOTPC), + REL_TYPE(R_386_8), + REL_TYPE(R_386_PC8), + REL_TYPE(R_386_16), + REL_TYPE(R_386_PC16), #undef REL_TYPE }; const char *name = "unknown type rel type name"; @@ -189,7 +229,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) name = sym_strtab + sym->st_name; } else { - name = sec_name(secs[sym->st_shndx].shdr.sh_name); + name = sec_name(sym->st_shndx); } return name; } @@ -403,13 +443,11 @@ static void print_absolute_symbols(void) for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; - Elf32_Sym *sh_symtab; int j; if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - sh_symtab = sec->symtab; sym_strtab = sec->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { Elf32_Sym *sym; @@ -474,7 +512,7 @@ static void print_absolute_relocs(void) * Before warning check if this absolute symbol * relocation is harmless. */ - if (is_abs_reloc(name) || is_rel_reloc(name)) + if (is_reloc(S_ABS, name) || is_reloc(S_REL, name)) continue; if (!printed) { @@ -498,7 +536,8 @@ static void print_absolute_relocs(void) printf("\n"); } -static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), + int use_real_mode) { int i; /* Walk through the relocations */ @@ -523,30 +562,67 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) Elf32_Rel *rel; Elf32_Sym *sym; unsigned r_type; + const char *symname; + int shn_abs; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); - /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS && - !is_rel_reloc(sym_name(sym_strtab, sym))) { - continue; - } + + shn_abs = sym->st_shndx == SHN_ABS; + switch (r_type) { case R_386_NONE: case R_386_PC32: + case R_386_PC16: + case R_386_PC8: /* * NONE can be ignored and and PC relative * relocations don't need to be adjusted. */ break; + + case R_386_16: + symname = sym_name(sym_strtab, sym); + if (!use_real_mode) + goto bad; + if (shn_abs) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_SEG, symname)) + goto bad; + } else { + if (is_reloc(S_LIN, symname)) + goto bad; + else + break; + } + visit(rel, sym); + break; + case R_386_32: - /* Visit relocations that need to be adjusted */ + symname = sym_name(sym_strtab, sym); + if (shn_abs) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_REL, symname)) + goto bad; + } else { + if (use_real_mode && + !is_reloc(S_LIN, symname)) + break; + } visit(rel, sym); break; default: die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); break; + bad: + symname = sym_name(sym_strtab, sym); + die("Invalid %s %s relocation: %s\n", + shn_abs ? "absolute" : "relative", + rel_type(r_type), symname); } } } @@ -554,13 +630,19 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) { - reloc_count += 1; + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + reloc16_count++; + else + reloc_count++; } static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) { /* Remember the address that needs to be adjusted. */ - relocs[reloc_idx++] = rel->r_offset; + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + relocs16[reloc16_idx++] = rel->r_offset; + else + relocs[reloc_idx++] = rel->r_offset; } static int cmp_relocs(const void *va, const void *vb) @@ -570,23 +652,41 @@ static int cmp_relocs(const void *va, const void *vb) return (*a == *b)? 0 : (*a > *b)? 1 : -1; } -static void emit_relocs(int as_text) +static int write32(unsigned int v, FILE *f) +{ + unsigned char buf[4]; + + put_unaligned_le32(v, buf); + return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; +} + +static void emit_relocs(int as_text, int use_real_mode) { int i; /* Count how many relocations I have and allocate space for them. */ reloc_count = 0; - walk_relocs(count_reloc); + walk_relocs(count_reloc, use_real_mode); relocs = malloc(reloc_count * sizeof(relocs[0])); if (!relocs) { die("malloc of %d entries for relocs failed\n", reloc_count); } + + relocs16 = malloc(reloc16_count * sizeof(relocs[0])); + if (!relocs16) { + die("malloc of %d entries for relocs16 failed\n", + reloc16_count); + } /* Collect up the relocations */ reloc_idx = 0; - walk_relocs(collect_reloc); + walk_relocs(collect_reloc, use_real_mode); + + if (reloc16_count && !use_real_mode) + die("Segment relocations found but --realmode not specified\n"); /* Order the relocations for more efficient processing */ qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs); /* Print the relocations */ if (as_text) { @@ -595,58 +695,83 @@ static void emit_relocs(int as_text) */ printf(".section \".data.reloc\",\"a\"\n"); printf(".balign 4\n"); - for (i = 0; i < reloc_count; i++) { - printf("\t .long 0x%08lx\n", relocs[i]); + if (use_real_mode) { + printf("\t.long %lu\n", reloc16_count); + for (i = 0; i < reloc16_count; i++) + printf("\t.long 0x%08lx\n", relocs16[i]); + printf("\t.long %lu\n", reloc_count); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } else { + /* Print a stop */ + printf("\t.long 0x%08lx\n", (unsigned long)0); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } } + printf("\n"); } else { - unsigned char buf[4]; - /* Print a stop */ - fwrite("\0\0\0\0", 4, 1, stdout); - /* Now print each relocation */ - for (i = 0; i < reloc_count; i++) { - put_unaligned_le32(relocs[i], buf); - fwrite(buf, 4, 1, stdout); + if (use_real_mode) { + write32(reloc16_count, stdout); + for (i = 0; i < reloc16_count; i++) + write32(relocs16[i], stdout); + write32(reloc_count, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) + write32(relocs[i], stdout); + } else { + /* Print a stop */ + write32(0, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) { + write32(relocs[i], stdout); + } } } } static void usage(void) { - die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); + die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); } int main(int argc, char **argv) { int show_absolute_syms, show_absolute_relocs; - int as_text; + int as_text, use_real_mode; const char *fname; FILE *fp; int i; - regex_init(); - show_absolute_syms = 0; show_absolute_relocs = 0; as_text = 0; + use_real_mode = 0; fname = NULL; for (i = 1; i < argc; i++) { char *arg = argv[i]; if (*arg == '-') { - if (strcmp(argv[1], "--abs-syms") == 0) { + if (strcmp(arg, "--abs-syms") == 0) { show_absolute_syms = 1; continue; } - - if (strcmp(argv[1], "--abs-relocs") == 0) { + if (strcmp(arg, "--abs-relocs") == 0) { show_absolute_relocs = 1; continue; } - else if (strcmp(argv[1], "--text") == 0) { + if (strcmp(arg, "--text") == 0) { as_text = 1; continue; } + if (strcmp(arg, "--realmode") == 0) { + use_real_mode = 1; + continue; + } } else if (!fname) { fname = arg; @@ -657,6 +782,7 @@ int main(int argc, char **argv) if (!fname) { usage(); } + regex_init(use_real_mode); fp = fopen(fname, "r"); if (!fp) { die("Cannot open %s: %s\n", @@ -675,6 +801,6 @@ int main(int argc, char **argv) print_absolute_relocs(); return 0; } - emit_relocs(as_text); + emit_relocs(as_text, use_real_mode); return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4f51bebac02c..95dccce8e979 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -63,6 +63,7 @@ #include <asm/stackprotector.h> #include <asm/hypervisor.h> #include <asm/mwait.h> +#include <asm/pci_x86.h> #ifdef CONFIG_ACPI #include <linux/acpi.h> @@ -261,7 +262,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#ifdef CONFIG_ACPI +#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ + !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -349,7 +351,6 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - if (xen_check_mwait()) cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } @@ -809,9 +810,40 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC +static unsigned long xen_set_apic_id(unsigned int x) +{ + WARN_ON(1); + return x; +} +static unsigned int xen_get_apic_id(unsigned long x) +{ + return ((x)>>24) & 0xFFu; +} static u32 xen_apic_read(u32 reg) { - return 0; + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.pcpu_info.xen_cpuid = 0, + }; + int ret = 0; + + /* Shouldn't need this as APIC is turned off for PV, and we only + * get called on the bootup processor. But just in case. */ + if (!xen_initial_domain() || smp_processor_id()) + return 0; + + if (reg == APIC_LVR) + return 0x10; + + if (reg != APIC_ID) + return 0; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return 0; + + return op.u.pcpu_info.apic_id << 24; } static void xen_apic_write(u32 reg, u32 val) @@ -849,6 +881,8 @@ static void set_xen_basic_apic_ops(void) apic->icr_write = xen_apic_icr_write; apic->wait_icr_idle = xen_apic_wait_icr_idle; apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; + apic->set_apic_id = xen_set_apic_id; + apic->get_apic_id = xen_get_apic_id; } #endif @@ -1365,8 +1399,10 @@ asmlinkage void __init xen_start_kernel(void) /* Make sure ACS will be enabled */ pci_request_acs(); } - - +#ifdef CONFIG_PCI + /* PCI BIOS service won't work from a PV guest. */ + pci_probe &= ~PCI_PROBE_BIOS; +#endif xen_raw_console_write("about to get started...\n"); xen_setup_runstate_info(0); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b8e279479a6b..69f5857660ac 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -353,8 +353,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = mfn_to_pfn(mfn); + pteval_t flags = val & PTE_FLAGS_MASK; - val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + if (unlikely(pfn == ~0)) + val = flags & ~_PAGE_PRESENT; + else + val = ((pteval_t)pfn << PAGE_SHIFT) | flags; } return val; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 5fac6919b957..3700945ed0d5 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void) static void __init xen_filter_cpu_maps(void) { int i, rc; + unsigned int subtract = 0; if (!xen_initial_domain()) return; @@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void) } else { set_cpu_possible(i, false); set_cpu_present(i, false); + subtract++; } } +#ifdef CONFIG_HOTPLUG_CPU + /* This is akin to using 'nr_cpus' on the Linux command line. + * Which is OK as when we use 'dom0_max_vcpus=X' we can only + * have up to X, while nr_cpu_ids is greater than X. This + * normally is not a problem, except when CPU hotplugging + * is involved and then there might be more than X CPUs + * in the guest - which will not work as there is no + * hypercall to expand the max number of VCPUs an already + * running guest has. So cap it up to X. */ + if (subtract) + nr_cpu_ids = nr_cpu_ids - subtract; +#endif + } static void __init xen_smp_prepare_boot_cpu(void) @@ -250,18 +265,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) set_cpu_possible(cpu, false); } - for_each_possible_cpu (cpu) { - struct task_struct *idle; - - if (cpu == 0) - continue; - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); - + for_each_possible_cpu(cpu) set_cpu_present(cpu, true); - } } static int __cpuinit @@ -331,9 +336,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -static int __cpuinit xen_cpu_up(unsigned int cpu) +static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) { - struct task_struct *idle = idle_task(cpu); int rc; per_cpu(current_task, cpu) = idle; @@ -547,10 +551,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) xen_init_lock_cpu(0); } -static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) +static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc; - rc = native_cpu_up(cpu); + rc = native_cpu_up(cpu, tidle); WARN_ON (xen_smp_intr_init(cpu)); return rc; } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 79d7362ad6d1..3e45aa000718 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f + jnz 1f 2: call check_events 1: ENDPATCH(xen_restore_fl_direct) |