summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/crypto/fpu.c1
-rw-r--r--arch/x86/ia32/ia32_aout.c1
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h3
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/k8.h5
-rw-r--r--arch/x86/include/asm/lguest_hcall.h29
-rw-r--r--arch/x86/include/asm/mce.h8
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/alternative.c1
-rw-r--r--arch/x86/kernel/amd_iommu.c22
-rw-r--r--arch/x86/kernel/amd_iommu_init.c50
-rw-r--r--arch/x86/kernel/apb_timer.c3
-rw-r--r--arch/x86/kernel/aperture_64.c15
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/nmi.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/bootflag.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c21
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c138
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c55
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c80
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/dumpstack.h13
-rw-r--r--arch/x86/kernel/e820.c24
-rw-r--r--arch/x86/kernel/hpet.c10
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irqinit.c1
-rw-r--r--arch/x86/kernel/k8.c2
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kprobes.c27
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/mca_32.c1
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/mpparse.c4
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/pci-dma.c1
-rw-r--r--arch/x86/kernel/pci-gart_64.c4
-rw-r--r--arch/x86/kernel/pci-nommu.c1
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c1
-rw-r--r--arch/x86/kernel/setup.c15
-rw-r--r--arch/x86/kernel/smp.c1
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/uv_irq.c1
-rw-r--r--arch/x86/kernel/uv_time.c1
-rw-r--r--arch/x86/kernel/vmi_32.c1
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/lapic.c1
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/svm.c34
-rw-r--r--arch/x86/kvm/vmx.c28
-rw-r--r--arch/x86/kvm/x86.c53
-rw-r--r--arch/x86/lguest/boot.c61
-rw-r--r--arch/x86/lguest/i386_head.S2
-rw-r--r--arch/x86/lib/rwsem_64.S2
-rw-r--r--arch/x86/mm/hugetlbpage.c1
-rw-r--r--arch/x86/mm/init.c1
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/kmmio.c1
-rw-r--r--arch/x86/mm/mmio-mod.c1
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/mm/pgtable.c1
-rw-r--r--arch/x86/mm/pgtable_32.c3
-rw-r--r--arch/x86/mm/srat_64.c3
-rw-r--r--arch/x86/pci/acpi.c85
-rw-r--r--arch/x86/pci/common.c1
-rw-r--r--arch/x86/pci/i386.c3
-rw-r--r--arch/x86/pci/irq.c1
-rw-r--r--arch/x86/pci/mmconfig-shared.c1
-rw-r--r--arch/x86/pci/mrst.c4
-rw-r--r--arch/x86/pci/pcbios.c1
-rw-r--r--arch/x86/power/hibernate_32.c1
-rw-r--r--arch/x86/power/hibernate_64.c1
-rw-r--r--arch/x86/power/hibernate_asm_32.S15
-rw-r--r--arch/x86/vdso/vma.c1
-rw-r--r--arch/x86/xen/debugfs.c1
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/time.c1
121 files changed, 778 insertions, 299 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0eacb1ffb42..9458685902b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1216,8 +1216,8 @@ config NUMA_EMU
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
- range 1 9
- default "9" if MAXSMP
+ range 1 10
+ default "10" if MAXSMP
default "6" if X86_64
default "4" if X86_NUMAQ
default "3"
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index daef6cd2b45..1a8f8649c03 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/i387.h>
struct crypto_fpu_ctx {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 280c019cfad..0350311906a 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -21,7 +21,6 @@
#include <linux/fcntl.h>
#include <linux/ptrace.h>
#include <linux/user.h>
-#include <linux/slab.h>
#include <linux/binfmts.h>
#include <linux/personality.h>
#include <linux/init.h>
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 59b4556a5b9..e790bc1fbfa 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -626,7 +626,7 @@ ia32_sys_call_table:
.quad stub32_sigreturn
.quad stub32_clone /* 120 */
.quad sys_setdomainname
- .quad sys_uname
+ .quad sys_newuname
.quad sys_modify_ldt
.quad compat_sys_adjtimex
.quad sys32_mprotect /* 125 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 74c35431b7d..626be156d88 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -40,6 +40,7 @@
#include <linux/ptrace.h>
#include <linux/highuid.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <asm/uaccess.h>
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ba19ad4c47d..86a0ff0aeac 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -21,6 +21,7 @@
#define _ASM_X86_AMD_IOMMU_TYPES_H
#include <linux/types.h>
+#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -140,6 +141,7 @@
/* constants to configure the command buffer */
#define CMD_BUFFER_SIZE 8192
+#define CMD_BUFFER_UNINITIALIZED 1
#define CMD_BUFFER_ENTRIES 512
#define MMIO_CMD_SIZE_SHIFT 56
#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
@@ -237,6 +239,7 @@ struct protection_domain {
struct list_head list; /* for list of all protection domains */
struct list_head dev_list; /* List of all devices in this domain */
spinlock_t lock; /* mostly used to lock the page table*/
+ struct mutex api_lock; /* protect page tables in the iommu-api path */
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index a1dcfa3ab17..30a3e977612 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -347,6 +347,7 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
extern void __iomem *early_memremap(resource_size_t phys_addr,
unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
+extern void fixup_early_ioremap(void);
#define IO_SPACE_LIMIT 0xffff
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index f70e60071fe..af00bd1d208 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int k8_scan_nodes(void);
#ifdef CONFIG_K8_NB
+extern int num_k8_northbridges;
+
static inline struct pci_dev *node_to_k8_nb_misc(int node)
{
return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
}
+
#else
+#define num_k8_northbridges 0
+
static inline struct pci_dev *node_to_k8_nb_misc(int node)
{
return NULL;
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ba0eed8aa1a..b60f2924c41 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -28,22 +28,39 @@
#ifndef __ASSEMBLY__
#include <asm/hw_irq.h>
-#include <asm/kvm_para.h>
/*G:030
* But first, how does our Guest contact the Host to ask for privileged
* operations? There are two ways: the direct way is to make a "hypercall",
* to make requests of the Host Itself.
*
- * We use the KVM hypercall mechanism, though completely different hypercall
- * numbers. Seventeen hypercalls are available: the hypercall number is put in
- * the %eax register, and the arguments (when required) are placed in %ebx,
- * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax.
+ * Our hypercall mechanism uses the highest unused trap code (traps 32 and
+ * above are used by real hardware interrupts). Seventeen hypercalls are
+ * available: the hypercall number is put in the %eax register, and the
+ * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
+ * If a return value makes sense, it's returned in %eax.
*
* Grossly invalid calls result in Sudden Death at the hands of the vengeful
* Host, rather than returning failure. This reflects Winston Churchill's
* definition of a gentleman: "someone who is only rude intentionally".
-:*/
+ */
+static inline unsigned long
+hcall(unsigned long call,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4)
+{
+ /* "int" is the Intel instruction to trigger a trap. */
+ asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
+ /* The call in %eax (aka "a") might be overwritten */
+ : "=a"(call)
+ /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
+ : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
+ /* "memory" means this might write somewhere in memory.
+ * This isn't true for all calls, but it's safe to tell
+ * gcc that it might happen so it doesn't get clever. */
+ : "memory");
+ return call;
+}
/* Can't use our min() macro here: needs to be a constant */
#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6c3fdd631ed..f32a4301c4d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void);
static inline void mcheck_intel_therm_init(void) { }
#endif
+/*
+ * Used by APEI to report memory error via /dev/mcelog
+ */
+
+struct cper_sec_mem_err;
+extern void apei_mce_report_mem_error(int corrected,
+ struct cper_sec_mem_err *mem_err);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 47339a1ac7b..2984a25ff38 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -19,7 +19,6 @@
#include <asm/paravirt.h>
#include <linux/bitops.h>
-#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0061ea26306..cd40aba6aa9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,6 +31,7 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
+#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3a4bf35c179..1a160d5d44d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -8,6 +8,7 @@
#include <linux/vmalloc.h>
#include <linux/memory.h>
#include <linux/stop_machine.h>
+#include <linux/slab.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index adb0ba02570..f854d89b7ed 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,8 +18,8 @@
*/
#include <linux/pci.h>
-#include <linux/gfp.h>
#include <linux/bitmap.h>
+#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
@@ -118,7 +118,7 @@ static bool check_device(struct device *dev)
return false;
/* No device or no PCI device */
- if (!dev || dev->bus != &pci_bus_type)
+ if (dev->bus != &pci_bus_type)
return false;
devid = get_device_id(dev);
@@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
u32 tail, head;
u8 *target;
+ WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
target = iommu->cmd_buf + tail;
memcpy_toio(target, cmd, sizeof(*cmd));
@@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void)
struct dma_ops_domain *dma_dom;
u16 devid;
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ for_each_pci_dev(dev) {
/* Do we handle this device? */
if (!check_device(&dev->dev))
@@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain)
list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
struct device *dev = dev_data->dev;
- do_detach(dev);
+ __detach_device(dev);
atomic_set(&dev_data->bind, 0);
}
@@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void)
return NULL;
spin_lock_init(&domain->lock);
+ mutex_init(&domain->api_lock);
domain->id = domain_id_alloc();
if (!domain->id)
goto out_err;
@@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
free_pagetable(domain);
- domain_id_free(domain->id);
-
- kfree(domain);
+ protection_domain_free(domain);
dom->priv = NULL;
}
@@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
iova &= PAGE_MASK;
paddr &= PAGE_MASK;
+ mutex_lock(&domain->api_lock);
+
for (i = 0; i < npages; ++i) {
ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
if (ret)
@@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
paddr += PAGE_SIZE;
}
+ mutex_unlock(&domain->api_lock);
+
return 0;
}
@@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
iova &= PAGE_MASK;
+ mutex_lock(&domain->api_lock);
+
for (i = 0; i < npages; ++i) {
iommu_unmap_page(domain, iova, PM_MAP_4k);
iova += PAGE_SIZE;
}
iommu_flush_tlb_pde(domain);
+
+ mutex_unlock(&domain->api_lock);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 9dc91b43147..6360abf993d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -19,8 +19,8 @@
#include <linux/pci.h>
#include <linux/acpi.h>
-#include <linux/gfp.h>
#include <linux/list.h>
+#include <linux/slab.h>
#include <linux/sysdev.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
@@ -138,9 +138,9 @@ int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
/*
- * Set to true if ACPI table parsing and hardware intialization went properly
+ * The ACPI table parsing functions set this variable on an error
*/
-static bool amd_iommu_initialized;
+static int __initdata amd_iommu_init_err;
/*
* List of protection domains - used during resume
@@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
*/
for (i = 0; i < table->length; ++i)
checksum += p[i];
- if (checksum != 0)
+ if (checksum != 0) {
/* ACPI table corrupt */
- return -ENODEV;
+ amd_iommu_init_err = -ENODEV;
+ return 0;
+ }
p += IVRS_HEADER_LENGTH;
@@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
if (cmd_buf == NULL)
return NULL;
- iommu->cmd_buf_size = CMD_BUFFER_SIZE;
+ iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
return cmd_buf;
}
@@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
&entry, sizeof(entry));
amd_iommu_reset_cmd_buffer(iommu);
+ iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
}
static void __init free_command_buffer(struct amd_iommu *iommu)
{
free_pages((unsigned long)iommu->cmd_buf,
- get_order(iommu->cmd_buf_size));
+ get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
}
/* allocates the memory where the IOMMU will log its events to */
@@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
h->mmio_phys);
iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
- if (iommu == NULL)
- return -ENOMEM;
+ if (iommu == NULL) {
+ amd_iommu_init_err = -ENOMEM;
+ return 0;
+ }
+
ret = init_iommu_one(iommu, h);
- if (ret)
- return ret;
+ if (ret) {
+ amd_iommu_init_err = ret;
+ return 0;
+ }
break;
default:
break;
@@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
}
WARN_ON(p != end);
- amd_iommu_initialized = true;
-
return 0;
}
@@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
return -ENODEV;
+ ret = amd_iommu_init_err;
+ if (ret)
+ goto out;
+
dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
@@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", init_iommu_all) != 0)
goto free;
- if (!amd_iommu_initialized)
+ if (amd_iommu_init_err) {
+ ret = amd_iommu_init_err;
goto free;
+ }
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
+ if (amd_iommu_init_err) {
+ ret = amd_iommu_init_err;
+ goto free;
+ }
+
ret = sysdev_class_register(&amd_iommu_sysdev_class);
if (ret)
goto free;
@@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void)
if (ret)
goto free;
+ enable_iommus();
+
if (iommu_pass_through)
ret = amd_iommu_init_passthrough();
else
@@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void)
amd_iommu_init_notifier();
- enable_iommus();
-
if (iommu_pass_through)
goto out;
@@ -1315,6 +1332,7 @@ out:
return ret;
free:
+ disable_iommus();
amd_iommu_uninit_devices();
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 4b7099526d2..a35347501d3 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/sysdev.h>
+#include <linux/slab.h>
#include <linux/pm.h>
#include <linux/pci.h>
#include <linux/sfi.h>
@@ -428,7 +429,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
static __init int apbt_late_init(void)
{
- if (disable_apbt_percpu)
+ if (disable_apbt_percpu || !apb_timer_block_enabled)
return 0;
/* This notifier should be called after workqueue is ready */
hotcpu_notifier(apbt_cpuhp_notify, -20);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3704997e8b2..b5d8b0bcf23 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void)
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
int dev_base, dev_limit;
+ u32 ctl;
bus = bus_dev_ranges[i].bus;
dev_base = bus_dev_ranges[i].dev_base;
@@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void)
gart_iommu_aperture = 1;
x86_init.iommu.iommu_init = gart_iommu_init;
- aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
+ ctl = read_pci_config(bus, slot, 3,
+ AMD64_GARTAPERTURECTL);
+
+ /*
+ * Before we do anything else disable the GART. It may
+ * still be enabled if we boot into a crash-kernel here.
+ * Reconfiguring the GART while it is enabled could have
+ * unknown side-effects.
+ */
+ ctl &= ~GARTEN;
+ write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
+
+ aper_order = (ctl >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
aper_base <<= 25;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 00187f1fcfb..e5a4a1e0161 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1640,8 +1640,10 @@ int __init APIC_init_uniprocessor(void)
}
#endif
+#ifndef CONFIG_SMP
enable_IR_x2apic();
default_setup_apic_routing();
+#endif
verify_local_APIC();
connect_bsp_APIC();
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index dd2b5f26464..03ba1b895f5 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -42,6 +42,7 @@
#include <linux/errno.h>
#include <linux/acpi.h>
#include <linux/init.h>
+#include <linux/gfp.h>
#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/io.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 463de9a858a..eb2789c3f72 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -36,6 +36,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/jiffies.h> /* time_after() */
+#include <linux/slab.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
@@ -2544,6 +2545,9 @@ void irq_force_complete_move(int irq)
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg = desc->chip_data;
+ if (!cfg)
+ return;
+
__irq_complete_move(&desc, cfg->vector);
}
#else
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 8aa65adbd25..1edaf15c0b8 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
#include <linux/percpu.h>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 49dbeaef2a2..c085d52dbaf 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -17,6 +17,7 @@
#include <linux/ctype.h>
#include <linux/sched.h>
#include <linux/timer.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/io.h>
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 30f25a75fe2..5de7f4c5697 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -5,7 +5,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
-#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/acpi.h>
#include <asm/io.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1b1920fa7c8..459168083b7 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
+#include <linux/slab.h>
#include <trace/events/power.h>
#include <linux/acpi.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 006b278b0d5..c587db472a7 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -20,7 +20,6 @@
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/cpufreq.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index ac27ec2264d..16e3483be9e 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -80,6 +80,7 @@
#include <linux/cpufreq.h>
#include <linux/pci.h>
#include <linux/errno.h>
+#include <linux/slab.h>
#include <asm/processor-cyrix.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index da5f70fcb76..e7b559d74c5 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/cpufreq.h>
#include <linux/timex.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 86961519372..7b8a8ba67b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -25,7 +25,6 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpufreq.h>
-#include <linux/slab.h>
#include <linux/cpumask.h>
#include <linux/timex.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index ff36d2979a9..ce7cde713e7 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -30,6 +30,7 @@
#include <linux/sched.h>
#include <linux/cpufreq.h>
#include <linux/compiler.h>
+#include <linux/slab.h>
#include <linux/acpi.h>
#include <linux/io.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index cb01dac267d..b3379d6a5c5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -13,7 +13,6 @@
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/ioport.h>
-#include <linux/slab.h>
#include <linux/timex.h>
#include <linux/io.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d360b56e982..b6215b9798e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
powernow_table[i].index = index;
/* Frequency may be rounded for these */
- if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) {
+ if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+ || boot_cpu_data.x86 == 0x11) {
powernow_table[i].frequency =
freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
} else
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 8d672ef162c..9b1ff37de46 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -20,6 +20,7 @@
#include <linux/sched.h> /* current */
#include <linux/delay.h>
#include <linux/compiler.h>
+#include <linux/gfp.h>
#include <asm/msr.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2ce8e0b5cc5..561758e9518 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -23,7 +23,6 @@
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/pci.h>
-#include <linux/slab.h>
#include <linux/sched.h>
#include "speedstep-lib.h"
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index ad0083abfa2..a94ec6be69f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -13,7 +13,6 @@
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
-#include <linux/slab.h>
#include <asm/msr.h>
#include <asm/tsc.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 04d73c114e4..8abd869baab 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -17,7 +17,6 @@
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
-#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <asm/ist.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7e1cca13af3..1366c7cfd48 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ /*
+ * Atom erratum AAE44/AAF40/AAG38/AAH41:
+ *
+ * A race condition between speculative fetches and invalidating
+ * a large page. This is worked around in microcode, but we
+ * need the microcode to have already been loaded... so if it is
+ * not, recommend a BIOS update and disable large pages.
+ */
+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
+ u32 ucode, junk;
+
+ wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ sync_core();
+ rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
+
+ if (ucode < 0x20e) {
+ printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+ clear_cpu_cap(c, X86_FEATURE_PSE);
+ }
+ }
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#else
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index b3eeb66c0a5..95962a93f99 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -340,6 +340,10 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
(boot_cpu_data.x86_mask < 0x1)))
return;
+ /* not in virtualized environments */
+ if (num_k8_northbridges == 0)
+ return;
+
this_leaf->can_disable = true;
this_leaf->l3_indices = amd_calc_l3_indices();
}
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 4ac6d48fe11..bb34b03af25 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+
+obj-$(CONFIG_ACPI_APEI) += mce-apei.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
new file mode 100644
index 00000000000..745b54f9be8
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -0,0 +1,138 @@
+/*
+ * Bridge between MCE and APEI
+ *
+ * On some machine, corrected memory errors are reported via APEI
+ * generic hardware error source (GHES) instead of corrected Machine
+ * Check. These corrected memory errors can be reported to user space
+ * through /dev/mcelog via faking a corrected Machine Check, so that
+ * the error memory page can be offlined by /sbin/mcelog if the error
+ * count for one page is beyond the threshold.
+ *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/cper.h>
+#include <acpi/apei.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
+{
+ struct mce m;
+
+ /* Only corrected MC is reported */
+ if (!corrected)
+ return;
+
+ mce_setup(&m);
+ m.bank = 1;
+ /* Fake a memory read corrected error with unknown channel */
+ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+ m.addr = mem_err->physical_addr;
+ mce_log(&m);
+ mce_notify_irq();
+}
+EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
+
+#define CPER_CREATOR_MCE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ struct mce mce;
+} __packed;
+
+int apei_write_mce(struct mce *m)
+{
+ struct cper_mce_record rcd;
+
+ memset(&rcd, 0, sizeof(rcd));
+ memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd.hdr.revision = CPER_RECORD_REV;
+ rcd.hdr.signature_end = CPER_SIG_END;
+ rcd.hdr.section_count = 1;
+ rcd.hdr.error_severity = CPER_SER_FATAL;
+ /* timestamp, platform_id, partition_id are all invalid */
+ rcd.hdr.validation_bits = 0;
+ rcd.hdr.record_length = sizeof(rcd);
+ rcd.hdr.creator_id = CPER_CREATOR_MCE;
+ rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd.hdr.record_id = cper_next_record_id();
+ rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+ rcd.sec_hdr.section_length = sizeof(rcd.mce);
+ rcd.sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd.sec_hdr.validation_bits = 0;
+ rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+ rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+ memcpy(&rcd.mce, m, sizeof(*m));
+
+ return erst_write(&rcd.hdr);
+}
+
+ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+ struct cper_mce_record rcd;
+ ssize_t len;
+
+ len = erst_read_next(&rcd.hdr, sizeof(rcd));
+ if (len <= 0)
+ return len;
+ /* Can not skip other records in storage via ERST unless clear them */
+ else if (len != sizeof(rcd) ||
+ uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+ if (printk_ratelimit())
+ pr_warning(
+ "MCE-APEI: Can not skip the unknown record in ERST");
+ return -EIO;
+ }
+
+ memcpy(m, &rcd.mce, sizeof(*m));
+ *record_id = rcd.hdr.record_id;
+
+ return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+ return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+ return erst_clear(record_id);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 73734baa50f..e7dbde7bfed 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -22,6 +22,7 @@
#include <linux/kdebug.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/gfp.h>
#include <asm/mce.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab6..fefcc69ee8b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
+#ifdef CONFIG_ACPI_APEI
+int apei_write_mce(struct mce *m);
+ssize_t apei_read_mce(struct mce *m, u64 *record_id);
+int apei_check_mce(void);
+int apei_clear_mce(u64 record_id);
+#else
+static inline int apei_write_mce(struct mce *m)
+{
+ return -EINVAL;
+}
+static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+ return 0;
+}
+static inline int apei_check_mce(void)
+{
+ return 0;
+}
+static inline int apei_clear_mce(u64 record_id)
+{
+ return -EINVAL;
+}
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3ab9c886b61..09535ca9b9d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -26,6 +26,7 @@
#include <linux/sched.h>
#include <linux/sysfs.h>
#include <linux/types.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/poll.h>
@@ -263,7 +264,7 @@ static void wait_for_panic(void)
static void mce_panic(char *msg, struct mce *final, char *exp)
{
- int i;
+ int i, apei_err = 0;
if (!fake_panic) {
/*
@@ -286,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
- if (!(m->status & MCI_STATUS_UC))
+ if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
+ if (!apei_err)
+ apei_err = apei_write_mce(m);
+ }
}
/* Now print uncorrected but with the final one last */
for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -296,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
continue;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || memcmp(m, final, sizeof(struct mce)))
+ if (!final || memcmp(m, final, sizeof(struct mce))) {
print_mce(m);
+ if (!apei_err)
+ apei_err = apei_write_mce(m);
+ }
}
- if (final)
+ if (final) {
print_mce(final);
+ if (!apei_err)
+ apei_err = apei_write_mce(final);
+ }
if (cpu_missing)
printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
print_mce_tail();
@@ -1492,6 +1502,43 @@ static void collect_tscs(void *data)
rdtscll(cpu_tsc[smp_processor_id()]);
}
+static int mce_apei_read_done;
+
+/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
+static int __mce_read_apei(char __user **ubuf, size_t usize)
+{
+ int rc;
+ u64 record_id;
+ struct mce m;
+
+ if (usize < sizeof(struct mce))
+ return -EINVAL;
+
+ rc = apei_read_mce(&m, &record_id);
+ /* Error or no more MCE record */
+ if (rc <= 0) {
+ mce_apei_read_done = 1;
+ return rc;
+ }
+ rc = -EFAULT;
+ if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
+ return rc;
+ /*
+ * In fact, we should have cleared the record after that has
+ * been flushed to the disk or sent to network in
+ * /sbin/mcelog, but we have no interface to support that now,
+ * so just clear it to avoid duplication.
+ */
+ rc = apei_clear_mce(record_id);
+ if (rc) {
+ mce_apei_read_done = 1;
+ return rc;
+ }
+ *ubuf += sizeof(struct mce);
+
+ return 0;
+}
+
static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
loff_t *off)
{
@@ -1505,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
return -ENOMEM;
mutex_lock(&mce_read_mutex);
+
+ if (!mce_apei_read_done) {
+ err = __mce_read_apei(&buf, usize);
+ if (err || buf != ubuf)
+ goto out;
+ }
+
next = rcu_dereference_check_mce(mcelog.next);
/* Only supports full reads right now */
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
- mutex_unlock(&mce_read_mutex);
- kfree(cpu_tsc);
-
- return -EINVAL;
- }
+ err = -EINVAL;
+ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+ goto out;
err = 0;
prev = 0;
@@ -1561,10 +1612,15 @@ timeout:
memset(&mcelog.entry[i], 0, sizeof(struct mce));
}
}
+
+ if (err)
+ err = -EFAULT;
+
+out:
mutex_unlock(&mce_read_mutex);
kfree(cpu_tsc);
- return err ? -EFAULT : buf - ubuf;
+ return err ? err : buf - ubuf;
}
static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1572,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
poll_wait(file, &mce_wait, wait);
if (rcu_dereference_check_mce(mcelog.next))
return POLLIN | POLLRDNORM;
+ if (!mce_apei_read_done && apei_check_mce())
+ return POLLIN | POLLRDNORM;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index cda932ca3ad..224392d8fe8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -21,6 +21,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sysfs.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index d15df6e49bf..62b48e40920 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -5,6 +5,7 @@
* Author: Andi Kleen
*/
+#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9aa5dc76ff4..fd31a441c61 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -6,7 +6,6 @@
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/io.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index e006e56f699..79289632cb2 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include <linux/init.h>
#define LINE_SIZE 80
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 60398a0d947..db5bdc8addf 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -21,6 +21,7 @@
#include <linux/kdebug.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
+#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
@@ -28,6 +29,7 @@
#include <asm/apic.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
+#include <asm/compat.h>
static u64 perf_event_mask __read_mostly;
@@ -158,7 +160,7 @@ struct x86_pmu {
struct perf_event *event);
struct event_constraint *event_constraints;
- void (*cpu_prepare)(int cpu);
+ int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
@@ -1333,11 +1335,12 @@ static int __cpuinit
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
+ int ret = NOTIFY_OK;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
if (x86_pmu.cpu_prepare)
- x86_pmu.cpu_prepare(cpu);
+ ret = x86_pmu.cpu_prepare(cpu);
break;
case CPU_STARTING:
@@ -1350,6 +1353,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
x86_pmu.cpu_dying(cpu);
break;
+ case CPU_UP_CANCELED:
case CPU_DEAD:
if (x86_pmu.cpu_dead)
x86_pmu.cpu_dead(cpu);
@@ -1359,7 +1363,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
}
- return NOTIFY_OK;
+ return ret;
}
static void __init pmu_check_apic(void)
@@ -1628,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return len;
}
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+#ifdef CONFIG_COMPAT
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
- unsigned long bytes;
+ /* 32-bit process in 64-bit kernel. */
+ struct stack_frame_ia32 frame;
+ const void __user *fp;
- bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+ if (!test_thread_flag(TIF_IA32))
+ return 0;
+
+ fp = compat_ptr(regs->bp);
+ while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ unsigned long bytes;
+ frame.next_frame = 0;
+ frame.return_address = 0;
+
+ bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (bytes != sizeof(frame))
+ break;
+
+ if (fp < compat_ptr(regs->sp))
+ break;
- return bytes == sizeof(*frame);
+ callchain_store(entry, frame.return_address);
+ fp = compat_ptr(frame.next_frame);
+ }
+ return 1;
+}
+#else
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+ return 0;
}
+#endif
static void
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1651,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip);
+ if (perf_callchain_user32(regs, entry))
+ return;
+
while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
- if (!copy_stack_frame(fp, &frame))
+ bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (bytes != sizeof(frame))
break;
if ((unsigned long)fp < regs->sp)
@@ -1702,7 +1739,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
-#ifdef CONFIG_EVENT_TRACING
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
regs->ip = ip;
@@ -1714,4 +1750,3 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
regs->cs = __KERNEL_CS;
local_save_flags(regs->flags);
}
-#endif
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index b87e0b6970c..db6f7d4056e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_has_nb(struct cpu_hw_events *cpuc)
+{
+ struct amd_nb *nb = cpuc->amd_nb;
+
+ return nb && nb->nb_id != -1;
+}
+
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
@@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
/*
* only care about NB events
*/
- if (!(nb && amd_is_nb_event(hwc)))
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
return;
/*
@@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
/*
* if not NB event or no NB, then no constraints
*/
- if (!(nb && amd_is_nb_event(hwc)))
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
return &unconstrained;
/*
@@ -293,51 +300,55 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
return nb;
}
-static void amd_pmu_cpu_online(int cpu)
+static int amd_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ WARN_ON_ONCE(cpuc->amd_nb);
+
+ if (boot_cpu_data.x86_max_cores < 2)
+ return NOTIFY_OK;
+
+ cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+ if (!cpuc->amd_nb)
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
{
- struct cpu_hw_events *cpu1, *cpu2;
- struct amd_nb *nb = NULL;
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct amd_nb *nb;
int i, nb_id;
if (boot_cpu_data.x86_max_cores < 2)
return;
- /*
- * function may be called too early in the
- * boot process, in which case nb_id is bogus
- */
nb_id = amd_get_nb_id(cpu);
- if (nb_id == BAD_APICID)
- return;
-
- cpu1 = &per_cpu(cpu_hw_events, cpu);
- cpu1->amd_nb = NULL;
+ WARN_ON_ONCE(nb_id == BAD_APICID);
raw_spin_lock(&amd_nb_lock);
for_each_online_cpu(i) {
- cpu2 = &per_cpu(cpu_hw_events, i);
- nb = cpu2->amd_nb;
- if (!nb)
+ nb = per_cpu(cpu_hw_events, i).amd_nb;
+ if (WARN_ON_ONCE(!nb))
continue;
- if (nb->nb_id == nb_id)
- goto found;
- }
- nb = amd_alloc_nb(cpu, nb_id);
- if (!nb) {
- pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
- raw_spin_unlock(&amd_nb_lock);
- return;
+ if (nb->nb_id == nb_id) {
+ kfree(cpuc->amd_nb);
+ cpuc->amd_nb = nb;
+ break;
+ }
}
-found:
- nb->refcnt++;
- cpu1->amd_nb = nb;
+
+ cpuc->amd_nb->nb_id = nb_id;
+ cpuc->amd_nb->refcnt++;
raw_spin_unlock(&amd_nb_lock);
}
-static void amd_pmu_cpu_offline(int cpu)
+static void amd_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuhw;
@@ -349,8 +360,10 @@ static void amd_pmu_cpu_offline(int cpu)
raw_spin_lock(&amd_nb_lock);
if (cpuhw->amd_nb) {
- if (--cpuhw->amd_nb->refcnt == 0)
- kfree(cpuhw->amd_nb);
+ struct amd_nb *nb = cpuhw->amd_nb;
+
+ if (nb->nb_id == -1 || --nb->refcnt == 0)
+ kfree(nb);
cpuhw->amd_nb = NULL;
}
@@ -379,8 +392,9 @@ static __initconst struct x86_pmu amd_pmu = {
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,
- .cpu_prepare = amd_pmu_cpu_online,
- .cpu_dead = amd_pmu_cpu_offline,
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
};
static __init int amd_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 84bfde64a33..9c794ac8783 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -936,6 +936,7 @@ static __init int intel_pmu_init(void)
case 26: /* 45 nm nehalem, "Bloomfield" */
case 30: /* 45 nm nehalem, "Lynnfield" */
+ case 46: /* 45 nm nehalem-ex, "Beckton" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59c..dfdb4dba232 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
*/
#include <linux/dmi.h>
+#include <linux/module.h>
#include <asm/div64.h>
#include <asm/vmware.h>
#include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
return 0;
}
+EXPORT_SYMBOL(vmware_platform);
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 83e5e628de7..8b862d5900f 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,6 +40,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/uaccess.h>
+#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a4849c10a77..ebd4c51d096 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,7 +27,6 @@
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
-#include <asm/x86_init.h>
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
-
-#ifdef CONFIG_X86_64
- x86_platform.iommu_shutdown();
-#endif
-
crash_save_cpu(regs, safe_smp_processor_id());
}
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index cd97ce18c29..67414550c3c 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -5,6 +5,7 @@
* Copyright (C) IBM Corporation, 2004. All rights reserved
*/
+#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/highmem.h>
#include <linux/crash_dump.h>
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 29e5f7c845b..e1a93be4fd4 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -14,6 +14,8 @@
#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
#endif
+#include <linux/uaccess.h>
+
extern void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl);
@@ -30,6 +32,11 @@ struct stack_frame {
unsigned long return_address;
};
+struct stack_frame_ia32 {
+ u32 next_frame;
+ u32 return_address;
+};
+
static inline unsigned long rewind_frame_pointer(int n)
{
struct stack_frame *frame;
@@ -37,8 +44,10 @@ static inline unsigned long rewind_frame_pointer(int n)
get_bp(frame);
#ifdef CONFIG_FRAME_POINTER
- while (n--)
- frame = frame->next_frame;
+ while (n--) {
+ if (probe_kernel_address(&frame->next_frame, frame))
+ break;
+ }
#endif
return (unsigned long)frame;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440fbd7..7bca3c6a02f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -519,29 +519,45 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
(unsigned long long) start,
(unsigned long long) end);
- e820_print_type(old_type);
+ if (checktype)
+ e820_print_type(old_type);
printk(KERN_CONT "\n");
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
u64 final_start, final_end;
+ u64 ei_end;
if (checktype && ei->type != old_type)
continue;
+
+ ei_end = ei->addr + ei->size;
/* totally covered? */
- if (ei->addr >= start &&
- (ei->addr + ei->size) <= (start + size)) {
+ if (ei->addr >= start && ei_end <= end) {
real_removed_size += ei->size;
memset(ei, 0, sizeof(struct e820entry));
continue;
}
+
+ /* new range is totally covered? */
+ if (ei->addr < start && ei_end > end) {
+ e820_add_region(end, ei_end - end, ei->type);
+ ei->size = start - ei->addr;
+ real_removed_size += size;
+ continue;
+ }
+
/* partially covered */
final_start = max(start, ei->addr);
- final_end = min(start + size, ei->addr + ei->size);
+ final_end = min(end, ei_end);
if (final_start >= final_end)
continue;
real_removed_size += final_end - final_start;
+ /*
+ * left range could be head or tail, so need to update
+ * size at first.
+ */
ei->size -= final_end - final_start;
if (ei->addr < final_start)
continue;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ee4fa1bfcb3..23b4ecdffa9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -4,6 +4,7 @@
#include <linux/sysdev.h>
#include <linux/delay.h>
#include <linux/errno.h>
+#include <linux/slab.h>
#include <linux/hpet.h>
#include <linux/init.h>
#include <linux/cpu.h>
@@ -399,9 +400,15 @@ static int hpet_next_event(unsigned long delta,
* then we might have a real hardware problem. We can not do
* much about it here, but at least alert the user/admin with
* a prominent warning.
+ * An erratum on some chipsets (ICH9,..), results in comparator read
+ * immediately following a write returning old value. Workaround
+ * for this is to read this value second time, when first
+ * read returns old value.
*/
- WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
+ if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+ WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
KERN_WARNING "hpet: compare register read back failed.\n");
+ }
return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
@@ -1143,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq)
do_div(clc, freq);
clc >>= hpet_clockevent.shift;
hpet_pie_delta = clc;
+ hpet_pie_limit = 0;
}
return 1;
}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index c01a2b846d4..54c31c28548 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/regset.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <asm/sigcontext.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index fb725ee15f5..7c9f02c130f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -5,7 +5,6 @@
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
-#include <linux/slab.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/kernel_stat.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f01d390f9c5..0ed2d300cd4 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -5,7 +5,6 @@
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
-#include <linux/slab.h>
#include <linux/random.h>
#include <linux/kprobes.h>
#include <linux/init.h>
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index 9b895464dd0..0f7bc20cfcd 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -2,8 +2,8 @@
* Shared support code for AMD K8 northbridges and derivates.
* Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
*/
-#include <linux/gfp.h>
#include <linux/types.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index e444357375c..8afd9f321f1 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -9,6 +9,7 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/stat.h>
#include <linux/io.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index bfba6019d76..b2258ca9100 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -618,8 +618,8 @@ int kgdb_arch_init(void)
* portion of kgdb because this operation requires mutexs to
* complete.
*/
+ hw_breakpoint_init(&attr);
attr.bp_addr = (unsigned long)kgdb_arch_init;
- attr.type = PERF_TYPE_BREAKPOINT;
attr.bp_len = HW_BREAKPOINT_LEN_1;
attr.bp_type = HW_BREAKPOINT_W;
attr.disabled = 1;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b43bbaebe2c..1658efdfb4e 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -534,20 +534,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
struct kprobe_ctlblk *kcb;
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->ip = (unsigned long)addr;
- return 1;
- }
-
/*
* We don't want to be preempted for the entire
* duration of kprobe processing. We conditionally
@@ -579,6 +565,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
setup_singlestep(p, regs, kcb, 0);
return 1;
}
+ } else if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Back up over the (now missing) int3 and run
+ * the original instruction.
+ */
+ regs->ip = (unsigned long)addr;
+ preempt_enable_no_resched();
+ return 1;
} else if (kprobe_running()) {
p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ec6ef60cbd1..ea697263b37 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -7,6 +7,7 @@
*/
#include <linux/errno.h>
+#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4a8bb82248a..035c8c52918 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/string.h>
+#include <linux/gfp.h>
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 845d80ce1ef..63eaf659623 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -42,6 +42,7 @@
#include <linux/kernel.h>
#include <linux/mca.h>
#include <linux/kprobes.h>
+#include <linux/slab.h>
#include <asm/system.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 89f386f044e..e0bc186d750 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -23,6 +23,7 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/mm.h>
+#include <linux/gfp.h>
#include <asm/system.h>
#include <asm/page.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a2c1edd2d3a..e81030f71a8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -664,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
- reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+ reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
}
static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -693,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
+ reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
if (mpf->physptr)
smp_reserve_memory(mpf);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 206735ac8cb..4d4468e9f47 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -37,6 +37,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/uaccess.h>
+#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a4ac764a688..4b7e3d8b01d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -2,6 +2,7 @@
#include <linux/dma-debug.h>
#include <linux/dmar.h>
#include <linux/bootmem.h>
+#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/kmemleak.h>
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index f3af115a573..0f7f130caa6 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -29,6 +29,7 @@
#include <linux/iommu-helper.h>
#include <linux/sysdev.h>
#include <linux/io.h>
+#include <linux/gfp.h>
#include <asm/atomic.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
@@ -564,6 +565,9 @@ static void enable_gart_translations(void)
enable_gart_translation(dev, __pa(agp_gatt_table));
}
+
+ /* Flush the GART-TLB to remove stale entries */
+ k8_flush_garts();
}
/*
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 22be12b60a8..3af4af810c0 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -4,6 +4,7 @@
#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/init.h>
+#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 28ad9f4d8b9..0415c3ef91b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
* check OSVW bit for CPUs that are not affected
* by erratum #400
*/
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
- if (val >= 2) {
- rdmsrl(MSR_AMD64_OSVW_STATUS, val);
- if (!(val & BIT(1)))
- goto no_c1e_idle;
+ if (cpu_has(c, X86_FEATURE_OSVW)) {
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
+ if (val >= 2) {
+ rdmsrl(MSR_AMD64_OSVW_STATUS, val);
+ if (!(val & BIT(1)))
+ goto no_c1e_idle;
+ }
}
return 1;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4c..17cb3295cbf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -276,12 +276,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
set_tsk_thread_flag(p, TIF_FORK);
- p->thread.fs = me->thread.fs;
- p->thread.gs = me->thread.gs;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
+ p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
savesegment(fs, p->thread.fsindex);
+ p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a503b1fd04e..2e9b55027b7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
+#include <linux/slab.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
#include <linux/tracehook.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d76e18570c6..c4851eff57b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -55,7 +55,6 @@
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
-#include <linux/slab.h>
#include <linux/user.h>
#include <linux/delay.h>
@@ -608,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
+static __init void reserve_ibft_region(void)
+{
+ unsigned long addr, size = 0;
+
+ addr = find_ibft_region(&size);
+
+ if (size)
+ reserve_early_overlap_ok(addr, addr + size, "ibft");
+}
+
#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
@@ -910,6 +919,8 @@ void __init setup_arch(char **cmdline_p)
*/
find_smp_config();
+ reserve_ibft_region();
+
reserve_trampoline_memory();
#ifdef CONFIG_ACPI_SLEEP
@@ -977,8 +988,6 @@ void __init setup_arch(char **cmdline_p)
dma32_reserve_bootmem();
- reserve_ibft_region();
-
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index ec1de97600e..d801210945d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -21,6 +21,7 @@
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
+#include <linux/gfp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 06d98ae5a80..763d815e27a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -49,6 +49,7 @@
#include <linux/nmi.h>
#include <linux/tboot.h>
#include <linux/stackprotector.h>
+#include <linux/gfp.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -242,8 +243,6 @@ static void __cpuinit smp_callin(void)
end_local_APIC_setup();
map_cpu_to_logical_apicid();
- notify_cpu_starting(cpuid);
-
/*
* Need to setup vector mappings before we enable interrupts.
*/
@@ -264,6 +263,8 @@ static void __cpuinit smp_callin(void)
*/
smp_store_cpu_info(cpuid);
+ notify_cpu_starting(cpuid);
+
/*
* Allow the master to continue.
*/
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 364d015efeb..17b03dd3a6b 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -9,6 +9,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <asm/mmu_context.h>
#include <asm/uv/uv.h>
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index ece73d8e324..1d40336b030 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/rbtree.h>
+#include <linux/slab.h>
#include <linux/irq.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 2b75ef638db..56e421bc379 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -19,6 +19,7 @@
* Copyright (c) Dimitri Sivanich
*/
#include <linux/clockchips.h>
+#include <linux/slab.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 7dd599deca4..ce9fbacb752 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -28,6 +28,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/sched.h>
+#include <linux/gfp.h>
#include <asm/vmi.h>
#include <asm/io.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 294698b6daf..0150affad25 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -32,6 +32,7 @@
#define pr_fmt(fmt) "pit: " fmt
#include <linux/kvm_host.h>
+#include <linux/slab.h>
#include "irq.h"
#include "i8254.h"
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 07771da85de..a790fa128a9 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,6 +26,7 @@
* Port from Qemu.
*/
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/bitops.h>
#include "irq.h"
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4b224f90087..1eb7a4ae0c9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -26,6 +26,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/math64.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 741373e8ca7..19a8906bcaa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -31,6 +31,7 @@
#include <linux/hugetlb.h>
#include <linux/compiler.h>
#include <linux/srcu.h>
+#include <linux/slab.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
@@ -1489,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
for_each_sp(pages, sp, parents, i) {
kvm_mmu_zap_page(kvm, sp);
mmu_pages_clear_parents(&parents);
+ zapped++;
}
- zapped += pages.nr;
kvm_mmu_pages_init(parent, &parents, &pages);
}
@@ -1541,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
*/
if (used_pages > kvm_nr_mmu_pages) {
- while (used_pages > kvm_nr_mmu_pages) {
+ while (used_pages > kvm_nr_mmu_pages &&
+ !list_empty(&kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *page;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_zap_page(kvm, page);
+ used_pages -= kvm_mmu_zap_page(kvm, page);
used_pages--;
}
+ kvm_nr_mmu_pages = used_pages;
kvm->arch.n_free_mmu_pages = 0;
}
else
@@ -1595,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
&& !sp->role.invalid) {
pgprintk("%s: zap %lx %x\n",
__func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ if (kvm_mmu_zap_page(kvm, sp))
+ nn = bucket->first;
}
}
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 52f78dd0301..737361fcd50 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -26,6 +26,7 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/ftrace_event.h>
+#include <linux/slab.h>
#include <asm/desc.h>
@@ -705,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_svm;
+ err = -ENOMEM;
page = alloc_page(GFP_KERNEL);
- if (!page) {
- err = -ENOMEM;
+ if (!page)
goto uninit;
- }
- err = -ENOMEM;
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
- goto uninit;
+ goto free_page1;
nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!nested_msrpm_pages)
- goto uninit;
-
- svm->msrpm = page_address(msrpm_pages);
- svm_vcpu_init_msrpm(svm->msrpm);
+ goto free_page2;
hsave_page = alloc_page(GFP_KERNEL);
if (!hsave_page)
- goto uninit;
+ goto free_page3;
+
svm->nested.hsave = page_address(hsave_page);
+ svm->msrpm = page_address(msrpm_pages);
+ svm_vcpu_init_msrpm(svm->msrpm);
+
svm->nested.msrpm = page_address(nested_msrpm_pages);
svm->vmcb = page_address(page);
@@ -743,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return &svm->vcpu;
+free_page3:
+ __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
+free_page2:
+ __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
+free_page1:
+ __free_page(page);
uninit:
kvm_vcpu_uninit(&svm->vcpu);
free_svm:
@@ -2061,7 +2067,7 @@ static int cpuid_interception(struct vcpu_svm *svm)
static int iret_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.nmi_window_exits;
- svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
return 1;
}
@@ -2473,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
- svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
}
@@ -2533,10 +2539,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
if (masked) {
svm->vcpu.arch.hflags |= HF_NMI_MASK;
- svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
} else {
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
- svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
}
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14873b9f843..2f8db0ec8ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -26,6 +26,7 @@
#include <linux/sched.h>
#include <linux/moduleparam.h>
#include <linux/ftrace_event.h>
+#include <linux/slab.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -76,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
+#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -130,7 +133,7 @@ struct vcpu_vmx {
} host_state;
struct {
int vm86_active;
- u8 save_iopl;
+ ulong save_rflags;
struct kvm_save_segment {
u16 selector;
unsigned long base;
@@ -817,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
- unsigned long rflags;
+ unsigned long rflags, save_rflags;
rflags = vmcs_readl(GUEST_RFLAGS);
- if (to_vmx(vcpu)->rmode.vm86_active)
- rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
+ if (to_vmx(vcpu)->rmode.vm86_active) {
+ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+ save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+ }
return rflags;
}
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
- if (to_vmx(vcpu)->rmode.vm86_active)
+ if (to_vmx(vcpu)->rmode.vm86_active) {
+ to_vmx(vcpu)->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
+ }
vmcs_writel(GUEST_RFLAGS, rflags);
}
@@ -1482,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
flags = vmcs_readl(GUEST_RFLAGS);
- flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
- flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
+ flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+ flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1556,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
flags = vmcs_readl(GUEST_RFLAGS);
- vmx->rmode.save_iopl
- = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ vmx->rmode.save_rflags = flags;
flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -2696,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
return 0;
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
- GUEST_INTR_STATE_NMI));
+ (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
}
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e46282a5656..c4f35b545c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
#include <linux/cpufreq.h>
#include <linux/user-return-notifier.h>
#include <linux/srcu.h>
+#include <linux/slab.h>
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
@@ -432,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
#ifdef CONFIG_X86_64
if (cr0 & 0xffffffff00000000UL) {
- printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
- cr0, kvm_read_cr0(vcpu));
kvm_inject_gp(vcpu, 0);
return;
}
@@ -442,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
cr0 &= ~CR0_RESERVED_BITS;
if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
- printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
kvm_inject_gp(vcpu, 0);
return;
}
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
- printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
- "and a clear PE flag\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -460,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
int cs_db, cs_l;
if (!is_pae(vcpu)) {
- printk(KERN_DEBUG "set_cr0: #GP, start paging "
- "in long mode while PAE is disabled\n");
kvm_inject_gp(vcpu, 0);
return;
}
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
if (cs_l) {
- printk(KERN_DEBUG "set_cr0: #GP, start paging "
- "in long mode while CS.L == 1\n");
kvm_inject_gp(vcpu, 0);
return;
@@ -476,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
} else
#endif
if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
- "reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -504,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
if (cr4 & CR4_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE)) {
- printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
- "in long mode\n");
kvm_inject_gp(vcpu, 0);
return;
}
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
&& !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (cr4 & X86_CR4_VMXE) {
- printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -546,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (is_long_mode(vcpu)) {
if (cr3 & CR3_L_MODE_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
} else {
if (is_pae(vcpu)) {
if (cr3 & CR3_PAE_RESERVED_BITS) {
- printk(KERN_DEBUG
- "set_cr3: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
- printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
- "reserved bits\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -592,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
if (cr8 & CR8_RESERVED_BITS) {
- printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
kvm_inject_gp(vcpu, 0);
return;
}
@@ -648,15 +627,12 @@ static u32 emulated_msrs[] = {
static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits) {
- printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
- efer);
kvm_inject_gp(vcpu, 0);
return;
}
if (is_paging(vcpu)
&& (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
- printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -666,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
- printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -677,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
- printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
kvm_inject_gp(vcpu, 0);
return;
}
@@ -966,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
u32 offset = msr - MSR_IA32_MC0_CTL;
- /* only 0 or all 1s can be written to IA32_MCi_CTL */
+ /* only 0 or all 1s can be written to IA32_MCi_CTL
+ * some Linux kernels though clear bit 10 in bank 4 to
+ * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+ * this to avoid an uncatched #GP in the guest
+ */
if ((offset & 0x3) == 0 &&
- data != 0 && data != ~(u64)0)
+ data != 0 && (data | (1 << 10)) != ~(u64)0)
return -1;
vcpu->arch.mce_banks[offset] = data;
break;
@@ -1734,6 +1712,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
if (copy_from_user(cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out_free;
+ vcpu_load(vcpu);
for (i = 0; i < cpuid->nent; i++) {
vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -1751,6 +1730,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
+ vcpu_put(vcpu);
out_free:
vfree(cpuid_entries);
@@ -1771,9 +1751,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
+ vcpu_load(vcpu);
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
+ vcpu_put(vcpu);
return 0;
out:
@@ -2634,8 +2616,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- int r, n, i;
+ int r, i;
struct kvm_memory_slot *memslot;
+ unsigned long n;
unsigned long is_dirty = 0;
unsigned long *dirty_bitmap = NULL;
@@ -2650,7 +2633,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (!memslot->dirty_bitmap)
goto out;
- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ n = kvm_dirty_bitmap_bytes(memslot);
r = -ENOMEM;
dirty_bitmap = vmalloc(n);
@@ -4482,7 +4465,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = complete_pio(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r)
goto out;
}
@@ -5145,6 +5130,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
int ret = 0;
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
+ u32 desc_limit;
old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
@@ -5167,7 +5153,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
}
}
- if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
+ desc_limit = get_desc_limit(&nseg_desc);
+ if (!nseg_desc.p ||
+ ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
+ desc_limit < 0x2b)) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
return 1;
}
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7e59dc1d3fc..2bdf628066b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
local_irq_save(flags);
if (lguest_data.hcall_status[next_call] != 0xFF) {
/* Table full, so do normal hcall which will flush table. */
- kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+ hcall(call, arg1, arg2, arg3, arg4);
} else {
lguest_data.hcalls[next_call].arg0 = call;
lguest_data.hcalls[next_call].arg1 = arg1;
@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,
* So, when we're in lazy mode, we call async_hcall() to store the call for
* future processing:
*/
-static void lazy_hcall1(unsigned long call,
- unsigned long arg1)
+static void lazy_hcall1(unsigned long call, unsigned long arg1)
{
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- kvm_hypercall1(call, arg1);
+ hcall(call, arg1, 0, 0, 0);
else
async_hcall(call, arg1, 0, 0, 0);
}
/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
static void lazy_hcall2(unsigned long call,
- unsigned long arg1,
- unsigned long arg2)
+ unsigned long arg1,
+ unsigned long arg2)
{
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- kvm_hypercall2(call, arg1, arg2);
+ hcall(call, arg1, arg2, 0, 0);
else
async_hcall(call, arg1, arg2, 0, 0);
}
static void lazy_hcall3(unsigned long call,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3)
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3)
{
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- kvm_hypercall3(call, arg1, arg2, arg3);
+ hcall(call, arg1, arg2, arg3, 0);
else
async_hcall(call, arg1, arg2, arg3, 0);
}
#ifdef CONFIG_X86_PAE
static void lazy_hcall4(unsigned long call,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4)
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3,
+ unsigned long arg4)
{
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+ hcall(call, arg1, arg2, arg3, arg4);
else
async_hcall(call, arg1, arg2, arg3, arg4);
}
@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,
:*/
static void lguest_leave_lazy_mmu_mode(void)
{
- kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+ hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
paravirt_leave_lazy_mmu();
}
static void lguest_end_context_switch(struct task_struct *next)
{
- kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+ hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
paravirt_end_context_switch(next);
}
@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
/* Keep the local copy up to date. */
native_write_idt_entry(dt, entrynum, g);
/* Tell Host about this new entry. */
- kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
+ hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
}
/*
@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
struct desc_struct *idt = (void *)desc->address;
for (i = 0; i < (desc->size+1)/8; i++)
- kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
+ hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
}
/*
@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
struct desc_struct *gdt = (void *)desc->address;
for (i = 0; i < (desc->size+1)/8; i++)
- kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
+ hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
}
/*
@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
{
native_write_gdt_entry(dt, entrynum, desc, type);
/* Tell Host about this new entry. */
- kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum,
- dt[entrynum].a, dt[entrynum].b);
+ hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
+ dt[entrynum].a, dt[entrynum].b, 0);
}
/*
@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
}
/* Please wake us this far in the future. */
- kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
+ hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
return 0;
}
@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
/* A 0 argument shuts the clock down. */
- kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
+ hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
break;
case CLOCK_EVT_MODE_ONESHOT:
/* This is what we expect. */
@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)
/* STOP! Until an interrupt comes in. */
static void lguest_safe_halt(void)
{
- kvm_hypercall0(LHCALL_HALT);
+ hcall(LHCALL_HALT, 0, 0, 0, 0);
}
/*
@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)
*/
static void lguest_power_off(void)
{
- kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
- LGUEST_SHUTDOWN_POWEROFF);
+ hcall(LHCALL_SHUTDOWN, __pa("Power down"),
+ LGUEST_SHUTDOWN_POWEROFF, 0, 0);
}
/*
@@ -1123,7 +1122,7 @@ static void lguest_power_off(void)
*/
static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
{
- kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
+ hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
/* The hcall won't return, but to keep gcc happy, we're "done". */
return NOTIFY_DONE;
}
@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
len = sizeof(scratch) - 1;
scratch[len] = '\0';
memcpy(scratch, buf, len);
- kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
+ hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
/* This routine returns the number of bytes actually written. */
return len;
@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
*/
static void lguest_restart(char *reason)
{
- kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
+ hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
}
/*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 27eac0faee4..4f420c2f2d5 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -32,7 +32,7 @@ ENTRY(lguest_entry)
*/
movl $LHCALL_LGUEST_INIT, %eax
movl $lguest_data - __PAGE_OFFSET, %ebx
- .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+ int $LGUEST_TRAP_ENTRY
/* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 15acecf0d7a..41fcf00e49d 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -60,7 +60,7 @@ ENTRY(call_rwsem_down_write_failed)
ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
- decw %dx /* do nothing if still outstanding active readers */
+ decl %edx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
movq %rax,%rdi
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f46c340727b..069ce7c37c0 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -9,7 +9,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
-#include <linux/slab.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 452ee5b8f30..b278535b14a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
+#include <linux/gfp.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 5cb3f0f54f4..bca79091b9d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -25,11 +25,11 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
-#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <linux/gfp.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e9b040e1cde..ee41bba315d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/memory_hotplug.h>
#include <linux/nmi.h>
+#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5eb1ba74a3a..12e4d2d3c11 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -448,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
+void __init fixup_early_ioremap(void)
+{
+ int i;
+
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+ if (prev_map[i]) {
+ WARN_ON(1);
+ break;
+ }
+ }
+
+ early_ioremap_init();
+}
+
static int __init check_early_ioremap_leak(void)
{
int count = 0;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 536fb682336..5d0e67fff1a 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -21,6 +21,7 @@
#include <linux/kdebug.h>
#include <linux/mutex.h>
#include <linux/io.h>
+#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <linux/errno.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 34a3291ca10..3adff7dcc14 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -26,6 +26,7 @@
#include <linux/module.h>
#include <linux/debugfs.h>
+#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/version.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index cf07c26d9a4..28195c350b9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -6,13 +6,13 @@
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
+#include <linux/gfp.h>
#include <asm/e820.h>
#include <asm/processor.h>
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index ae9648eb1c7..edc8b95afc1 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,7 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/gfp.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index c9ba9deafe8..5c4ee422590 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,4 +1,5 @@
#include <linux/mm.h>
+#include <linux/gfp.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 46c8834aedc..792854003ed 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -6,7 +6,6 @@
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/highmem.h>
-#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/module.h>
@@ -19,6 +18,7 @@
#include <asm/e820.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/io.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -129,6 +129,7 @@ static int __init parse_reservetop(char *arg)
address = memparse(arg, &arg);
reserve_top_address(address);
+ fixup_early_ioremap();
return 0;
}
early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 28c68762648..38512d0c474 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -461,7 +461,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
* node, it must now point to the fake node ID.
*/
for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid)
+ if (apicid_to_node[j] == nid &&
+ fake_apicid_to_node[j] == NUMA_NO_NODE)
fake_apicid_to_node[j] = i;
}
for (i = 0; i < num_nodes; i++)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 0b7882dbe78..9dcf43d7d0c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/dmi.h>
+#include <linux/slab.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>
@@ -65,14 +66,44 @@ resource_to_addr(struct acpi_resource *resource,
struct acpi_resource_address64 *addr)
{
acpi_status status;
-
- status = acpi_resource_to_address64(resource, addr);
- if (ACPI_SUCCESS(status) &&
- (addr->resource_type == ACPI_MEMORY_RANGE ||
- addr->resource_type == ACPI_IO_RANGE) &&
- addr->address_length > 0 &&
- addr->producer_consumer == ACPI_PRODUCER) {
+ struct acpi_resource_memory24 *memory24;
+ struct acpi_resource_memory32 *memory32;
+ struct acpi_resource_fixed_memory32 *fixed_memory32;
+
+ memset(addr, 0, sizeof(*addr));
+ switch (resource->type) {
+ case ACPI_RESOURCE_TYPE_MEMORY24:
+ memory24 = &resource->data.memory24;
+ addr->resource_type = ACPI_MEMORY_RANGE;
+ addr->minimum = memory24->minimum;
+ addr->address_length = memory24->address_length;
+ addr->maximum = addr->minimum + addr->address_length - 1;
+ return AE_OK;
+ case ACPI_RESOURCE_TYPE_MEMORY32:
+ memory32 = &resource->data.memory32;
+ addr->resource_type = ACPI_MEMORY_RANGE;
+ addr->minimum = memory32->minimum;
+ addr->address_length = memory32->address_length;
+ addr->maximum = addr->minimum + addr->address_length - 1;
return AE_OK;
+ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
+ fixed_memory32 = &resource->data.fixed_memory32;
+ addr->resource_type = ACPI_MEMORY_RANGE;
+ addr->minimum = fixed_memory32->address;
+ addr->address_length = fixed_memory32->address_length;
+ addr->maximum = addr->minimum + addr->address_length - 1;
+ return AE_OK;
+ case ACPI_RESOURCE_TYPE_ADDRESS16:
+ case ACPI_RESOURCE_TYPE_ADDRESS32:
+ case ACPI_RESOURCE_TYPE_ADDRESS64:
+ status = acpi_resource_to_address64(resource, addr);
+ if (ACPI_SUCCESS(status) &&
+ (addr->resource_type == ACPI_MEMORY_RANGE ||
+ addr->resource_type == ACPI_IO_RANGE) &&
+ addr->address_length > 0) {
+ return AE_OK;
+ }
+ break;
}
return AE_ERROR;
}
@@ -90,30 +121,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
}
-static void
-align_resource(struct acpi_device *bridge, struct resource *res)
-{
- int align = (res->flags & IORESOURCE_MEM) ? 16 : 4;
-
- /*
- * Host bridge windows are not BARs, but the decoders on the PCI side
- * that claim this address space have starting alignment and length
- * constraints, so fix any obvious BIOS goofs.
- */
- if (!IS_ALIGNED(res->start, align)) {
- dev_printk(KERN_DEBUG, &bridge->dev,
- "host bridge window %pR invalid; "
- "aligning start to %d-byte boundary\n", res, align);
- res->start &= ~(align - 1);
- }
- if (!IS_ALIGNED(res->end + 1, align)) {
- dev_printk(KERN_DEBUG, &bridge->dev,
- "host bridge window %pR invalid; "
- "aligning end to %d-byte boundary\n", res, align);
- res->end = ALIGN(res->end, align) - 1;
- }
-}
-
static acpi_status
setup_resource(struct acpi_resource *acpi_res, void *data)
{
@@ -123,7 +130,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
acpi_status status;
unsigned long flags;
struct resource *root, *conflict;
- u64 start, end, max_len;
+ u64 start, end;
status = resource_to_addr(acpi_res, &addr);
if (!ACPI_SUCCESS(status))
@@ -140,19 +147,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
} else
return AE_OK;
- max_len = addr.maximum - addr.minimum + 1;
- if (addr.address_length > max_len) {
- dev_printk(KERN_DEBUG, &info->bridge->dev,
- "host bridge window length %#llx doesn't fit in "
- "%#llx-%#llx, trimming\n",
- (unsigned long long) addr.address_length,
- (unsigned long long) addr.minimum,
- (unsigned long long) addr.maximum);
- addr.address_length = max_len;
- }
-
start = addr.minimum + addr.translation_offset;
- end = start + addr.address_length - 1;
+ end = addr.maximum + addr.translation_offset;
res = &info->res[info->res_num];
res->name = info->name;
@@ -160,7 +156,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
res->start = start;
res->end = end;
res->child = NULL;
- align_resource(info->bridge, res);
if (!pci_use_crs) {
dev_printk(KERN_DEBUG, &info->bridge->dev,
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 294e10cb11e..cf2e93869c4 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -9,6 +9,7 @@
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/dmi.h>
+#include <linux/slab.h>
#include <asm/acpi.h>
#include <asm/segment.h>
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 46fd43f7910..97da2ba9344 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,6 +72,9 @@ pcibios_align_resource(void *data, const struct resource *res,
return start;
if (start & 0x300)
start = (start + 0x3ff) & ~0x3ff;
+ } else if (res->flags & IORESOURCE_MEM) {
+ if (start < BIOS_END)
+ start = BIOS_END;
}
return start;
}
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8b107521d24..5d362b5ba06 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -8,7 +8,6 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/dmi.h>
#include <linux/io.h>
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8f3f9a50b1e..39b9ebe8f88 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -16,6 +16,7 @@
#include <linux/sfi_acpi.h>
#include <linux/bitmap.h>
#include <linux/dmi.h>
+#include <linux/slab.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
#include <asm/acpi.h>
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 8bf2fcb88d0..1cdc02cf8fa 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
u32 size;
int i;
+ /* Must have extended configuration space */
+ if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
+ return;
+
/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
offset = fixed_bar_cap(dev->bus, dev->devfn);
if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 1c975cc9839..59a225c17b8 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -4,6 +4,7 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/pci_x86.h>
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 81197c62d5b..3769079874d 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -6,6 +6,7 @@
* Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*/
+#include <linux/gfp.h>
#include <linux/suspend.h>
#include <linux/bootmem.h>
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 65fdc86e923..d24f983ba1e 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -8,6 +8,7 @@
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
+#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/suspend.h>
#include <asm/proto.h>
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index b641388d828..ad47daeafa4 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -27,10 +27,17 @@ ENTRY(swsusp_arch_suspend)
ret
ENTRY(restore_image)
+ movl mmu_cr4_features, %ecx
movl resume_pg_dir, %eax
subl $__PAGE_OFFSET, %eax
movl %eax, %cr3
+ jecxz 1f # cr4 Pentium and higher, skip if zero
+ andl $~(X86_CR4_PGE), %ecx
+ movl %ecx, %cr4; # turn off PGE
+ movl %cr3, %eax; # flush TLB
+ movl %eax, %cr3
+1:
movl restore_pblist, %edx
.p2align 4,,7
@@ -54,16 +61,8 @@ done:
movl $swapper_pg_dir, %eax
subl $__PAGE_OFFSET, %eax
movl %eax, %cr3
- /* Flush TLB, including "global" things (vmalloc) */
movl mmu_cr4_features, %ecx
jecxz 1f # cr4 Pentium and higher, skip if zero
- movl %ecx, %edx
- andl $~(X86_CR4_PGE), %edx
- movl %edx, %cr4; # turn off PGE
-1:
- movl %cr3, %eax; # flush TLB
- movl %eax, %cr3
- jecxz 1f # cr4 Pentium and higher, skip if zero
movl %ecx, %cr4; # turn PGE back on
1:
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 21e1aeb9f3e..ac74869b814 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/elf.h>
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index e133ce25e29..1304bcec8ee 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -1,5 +1,6 @@
#include <linux/init.h>
#include <linux/debugfs.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include "debugfs.h"
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b607239c1ba..65d8d79b46a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
#include <linux/highmem.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/gfp.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f9eb7de74f4..914f04695ce 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -43,6 +43,7 @@
#include <linux/debugfs.h>
#include <linux/bug.h>
#include <linux/module.h>
+#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index deafb65ef44..a29693fd313 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -14,6 +14,7 @@
*/
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/slab.h>
#include <linux/smp.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 24ded31b5ae..e0500646585 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -6,6 +6,7 @@
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/log2.h>
+#include <linux/gfp.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0d3f07cd1b5..32764b8880b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
#include <linux/clockchips.h>
#include <linux/kernel_stat.h>
#include <linux/math64.h>
+#include <linux/gfp.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>