From 7752d5cfe3d11ca0bb9c673ec38bd78ba6578f8e Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Fri, 15 Feb 2008 01:27:20 -0800 Subject: x86: validate against acpi motherboard resources This path adds validation of the MMCONFIG table against the ACPI reserved motherboard resources. If the MMCONFIG table is found to be reserved in ACPI, we don't bother checking the E820 table. The PCI Express firmware spec apparently tells BIOS developers that reservation in ACPI is required and E820 reservation is optional, so checking against ACPI first makes sense. Many BIOSes don't reserve the MMCONFIG region in E820 even though it is perfectly functional, the existing check needlessly disables MMCONFIG in these cases. In order to do this, MMCONFIG setup has been split into two phases. If PCI configuration type 1 is not available then MMCONFIG is enabled early as before. Otherwise, it is enabled later after the ACPI interpreter is enabled, since we need to be able to execute control methods in order to check the ACPI reserved resources. Presently this is just triggered off the end of ACPI interpreter initialization. There are a few other behavioral changes here: - Validate all MMCONFIG configurations provided, not just the first one. - Validate the entire required length of each configuration according to the provided ending bus number is reserved, not just the minimum required allocation. - Validate that the area is reserved even if we read it from the chipset directly and not from the MCFG table. This catches the case where the BIOS didn't set the location properly in the chipset and has mapped it over other things it shouldn't have. This also cleans up the MMCONFIG initialization functions so that they simply do nothing if MMCONFIG is not compiled in. Based on an original patch by Rajesh Shah from Intel. [akpm@linux-foundation.org: many fixes and cleanups] Signed-off-by: Robert Hancock Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Greg KH Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Cc: Rajesh Shah Cc: Jesse Barnes Acked-by: Linus Torvalds Cc: Andi Kleen Cc: Greg KH Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 292491324b01..43a4f9cae67d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1053,5 +1053,13 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); +#ifdef CONFIG_PCI_MMCONFIG +extern void __init pci_mmcfg_early_init(int type); +extern void __init pci_mmcfg_late_init(void); +#else +static inline void pci_mmcfg_early_init(int type) { } +static inline void pci_mmcfg_late_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 57741a779070e0b141b6148136b420c8d35ccbce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:32:50 -0800 Subject: x86_64: set cfg_size for AMD Family 10h in case MMCONFIG reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Cc: Andrew Morton Acked-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/fixup.c | 17 +++++++++++++++++ drivers/pci/probe.c | 11 ++++++++++- include/linux/pci.h | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index a5ef5f551373..b60b2abd480c 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -493,3 +493,20 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); + +/* + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses. So check it + */ +static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +{ + dev->cfg_size = pci_cfg_space_size_ext(dev, 0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f991359f0c36..a8efdaef1870 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,11 +842,14 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) { int pos; u32 status; + if (!check_exp_pcix) + goto skip; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); @@ -858,6 +861,7 @@ int pci_cfg_space_size(struct pci_dev *dev) goto fail; } + skip: if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) goto fail; if (status == 0xffffffff) @@ -869,6 +873,11 @@ int pci_cfg_space_size(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +int pci_cfg_space_size(struct pci_dev *dev) +{ + return pci_cfg_space_size_ext(dev, 1); +} + static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 43a4f9cae67d..2b8f74522f8f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -666,6 +666,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3 From bb63b4219976d48ed6d22ac33c18be334fb5a78c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Feb 2008 23:56:50 -0800 Subject: x86 pci: remove checking type for mmconfig probe doesn't need to check if it is type1 or type2, we can use raw_pci_ops directly. also make pci_direct_conf1 static again. anyway is there system with type 2 and mmconf support? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/direct.c | 8 +++++--- arch/x86/pci/init.c | 11 +++++------ arch/x86/pci/mmconfig-shared.c | 32 +++++++++++++++----------------- include/linux/pci.h | 4 ++-- 4 files changed, 27 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 42f3e4cad179..21d1e0e0d535 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -258,7 +258,8 @@ void __init pci_direct_init(int type) { if (type == 0) return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); if (type == 1) raw_pci_ops = &pci_direct_conf1; else @@ -275,8 +276,10 @@ int __init pci_direct_probe(void) if (!region) goto type2; - if (pci_check_type1()) + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; return 1; + } release_resource(region); type2: @@ -290,7 +293,6 @@ int __init pci_direct_probe(void) goto fail2; if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; return 2; } diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 2080b04b3bcc..343c36337e69 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -6,14 +6,13 @@ in the right sequence from here. */ static __init int pci_access_init(void) { - int type __maybe_unused = 0; - #ifdef CONFIG_PCI_DIRECT + int type = 0; + type = pci_direct_probe(); #endif - pci_mmcfg_early_init(type); - if (raw_pci_ops) - return 0; + pci_mmcfg_early_init(); + #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif @@ -26,7 +25,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); #endif - if (!raw_pci_ops) + if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6f68658b519d..bdf62243186a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted; static const char __init *pci_mmcfg_e7520(void) { u32 win; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; if(win == 0x0000 || win == 0xf000) @@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void) pci_mmcfg_config_num = 1; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) @@ -179,6 +179,9 @@ static int __init pci_mmcfg_check_hostbridge(void) int i; const char *name; + if (!raw_pci_ops) + return 0; + pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; @@ -186,7 +189,7 @@ static int __init pci_mmcfg_check_hostbridge(void) for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; devfn = pci_mmcfg_probes[i].devfn; - pci_direct_conf1.read(0, bus, devfn, 0, 4, &l); + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); vendor = l & 0xffff; device = (l >> 16) & 0xffff; @@ -304,7 +307,7 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) return mcfg_res.flags; } -static void __init pci_mmcfg_reject_broken(int type, int early) +static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; int i; @@ -342,8 +345,8 @@ static void __init pci_mmcfg_reject_broken(int type, int early) " reserved in ACPI motherboard resources\n", cfg->address); /* Don't try to do this check unless configuration - type 1 is available. */ - if (type == 1 && e820_all_mapped(cfg->address, + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops && e820_all_mapped(cfg->address, cfg->address + size - 1, E820_RESERVED)) { printk(KERN_NOTICE @@ -368,7 +371,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int type, int early) +void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) @@ -382,14 +385,14 @@ void __init __pci_mmcfg_init(int type, int early) if (known_bridge) return; - if (early && type == 1) { + if (early) { if (pci_mmcfg_check_hostbridge()) known_bridge = 1; } if (!known_bridge) { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type, early); + pci_mmcfg_reject_broken(early); } if ((pci_mmcfg_config_num == 0) || @@ -410,19 +413,14 @@ void __init __pci_mmcfg_init(int type, int early) } } -void __init pci_mmcfg_early_init(int type) +void __init pci_mmcfg_early_init(void) { - __pci_mmcfg_init(type, 1); + __pci_mmcfg_init(1); } void __init pci_mmcfg_late_init(void) { - int type = 0; - - if (pci_probe & PCI_PROBE_CONF1) - type = 1; - - __pci_mmcfg_init(type, 0); + __pci_mmcfg_init(0); } static int __init pci_mmcfg_late_insert_resources(void) diff --git a/include/linux/pci.h b/include/linux/pci.h index 2b8f74522f8f..a71954a38932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1055,10 +1055,10 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); #ifdef CONFIG_PCI_MMCONFIG -extern void __init pci_mmcfg_early_init(int type); +extern void __init pci_mmcfg_early_init(void); extern void __init pci_mmcfg_late_init(void); #else -static inline void pci_mmcfg_early_init(int type) { } +static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif -- cgit v1.2.3 From 871d5f8dd0f7647f03facd4cb79485938d1b61ab Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:20:09 -0800 Subject: x86: get mp_bus_to_node early Currently, on an amd k8 system with multi ht chains, the numa_node of pci devices under /sys/devices/pci0000:80/* is always 0, even if that chain is on node 1 or 2 or 3. Workaround: pcibus_to_node(bus) is used when we want to get the node that pci_device is on. In struct device, we already have numa_node member, and we could use dev_to_node()/set_dev_node() to get and set numa_node in the device. set_dev_node is called in pci_device_add() with pcibus_to_node(bus), and pcibus_to_node uses bus->sysdata for nodeid. The problem is when pci_add_device is called, bus->sysdata is not assigned correct nodeid yet. The result is that numa_node will always be 0. pcibios_scan_root and pci_scan_root could take sysdata. So we need to get mp_bus_to_node mapping before these two are called, and thus get_mp_bus_to_node could get correct node for sysdata in root bus. In scanning of the root bus, all child busses will take parent bus sysdata. So all pci_device->dev.numa_node will be assigned correctly and automatically. Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we could also could make other bus specific device get the correct numa_node too. This is an updated version of pci_sysdata and Jeff's pci_domain patch. [ mingo@elte.hu: build fix ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_32 | 1 + arch/x86/pci/acpi.c | 27 ++++++++----- arch/x86/pci/common.c | 18 +++++++-- arch/x86/pci/irq.c | 4 +- arch/x86/pci/k8-bus_64.c | 92 +++++++++++++++++++++++++++++++------------ arch/x86/pci/legacy.c | 4 +- arch/x86/pci/mp_bus_to_node.c | 23 +++++++++++ include/asm-x86/pci.h | 2 + include/asm-x86/topology.h | 13 ++++++ 9 files changed, 142 insertions(+), 42 deletions(-) create mode 100644 arch/x86/pci/mp_bus_to_node.c (limited to 'include') diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index cdd6828b5abb..e9c5caf54e59 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -10,5 +10,6 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o +pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2664cb3fc96c..1a9c0c6a1a18 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do { struct pci_bus *bus; struct pci_sysdata *sd; + int node; +#ifdef CONFIG_ACPI_NUMA int pxm; +#endif dmi_check_system(acpi_pciprobe_dmi_table); @@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do return NULL; } + node = -1; +#ifdef CONFIG_ACPI_NUMA + pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) + node = pxm_to_node(pxm); + if (node != -1) + set_mp_bus_to_node(busnum, node); + else + node = get_mp_bus_to_node(busnum); +#endif + /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. @@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } sd->domain = domain; - sd->node = -1; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_ACPI_NUMA - if (pxm >= 0) - sd->node = pxm_to_node(pxm); -#endif + sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. @@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do kfree(sd); #ifdef CONFIG_ACPI_NUMA - if (bus != NULL) { + if (bus) { if (pxm >= 0) { - printk("bus %d -> pxm %d -> node %d\n", + printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", busnum, pxm, pxm_to_node(pxm)); } } @@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); - return bus; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 75fcc29ecf52..07d53184f7a4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return NULL; } + sd->node = get_mp_bus_to_node(busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + return bus; } extern u8 pci_cache_line_size; @@ -480,7 +485,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) +struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -495,10 +500,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); return NULL; } - sd->node = -1; - bus = pci_scan_bus(busno, &pci_root_ops, sd); + sd->node = node; + bus = pci_scan_bus(busno, ops, sd); if (!bus) kfree(sd); return bus; } + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + return pci_scan_bus_on_node(busno, &pci_root_ops, -1); +} diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 579745ca6b66..0908fca901bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void) busmap[e->bus] = 1; } for(i = 1; i < 256; i++) { + int node; if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus_with_sysdata(i)) + node = get_mp_bus_to_node(i); + if (pci_scan_bus_on_node(i, &pci_root_ops, node)) printk(KERN_INFO "PCI: Discovered primary peer " "bus %02x [IRQ]\n", i); } diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 9cc813e29706..3903efbca535 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include /* * This discovers the pcibus <-> node mapping on AMD K8. @@ -20,64 +22,102 @@ #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 +#ifdef CONFIG_NUMA + +#define BUS_NR 256 + +static int mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node = -1; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return node; + + node = mp_bus_to_node[busnum]; + + /* + * let numa_node_id to decide it later in dma_alloc_pages + * if there is no ram on that node + */ + if (node != -1 && !node_online(node)) + node = -1; + + return node; +} + +#endif + /** - * fill_mp_bus_to_cpumask() + * early_fill_mp_bus_to_node() + * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ __init static int -fill_mp_bus_to_cpumask(void) +early_fill_mp_bus_to_node(void) { - struct pci_dev *nb_dev = NULL; +#ifdef CONFIG_NUMA int i, j; + unsigned slot; u32 ldtbus, nid; + u32 id; static int lbnr[3] = { LDT_BUS_NUMBER_REGISTER_0, LDT_BUS_NUMBER_REGISTER_1, LDT_BUS_NUMBER_REGISTER_2 }; - while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { - pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); + for (i = 0; i < BUS_NR; i++) + mp_bus_to_node[i] = -1; + + if (!early_pci_allowed()) + return -1; + + for (slot = 0x18; slot < 0x20; slot++) { + id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + if (id != (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_K8HTCONFIG<<16))) + break; + nid = read_pci_config(0, slot, 0, NODE_ID_REGISTER); for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); + ldtbus = read_pci_config(0, slot, 0, lbnr[i]); /* * if there are no busses hanging off of the current * ldt link then both the secondary and subordinate * bus number fields are set to 0. - * + * * RED-PEN * This is slightly broken because it assumes - * HT node IDs == Linux node ids, which is not always + * HT node IDs == Linux node ids, which is not always * true. However it is probably mostly true. */ if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - j++) { - struct pci_bus *bus; - struct pci_sysdata *sd; - - long node = NODE_ID(nid); - /* Algorithm a bit dumb, but - it shouldn't matter here */ - bus = pci_find_bus(0, j); - if (!bus) - continue; - if (!node_online(node)) - node = 0; - - sd = bus->sysdata; - sd->node = node; - } + j++) { + int node = NODE_ID(nid); + mp_bus_to_node[j] = (unsigned char)node; + } } } } + for (i = 0; i < BUS_NR; i++) { + int node = mp_bus_to_node[i]; + if (node >= 0) + printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); + } +#endif return 0; } -fs_initcall(fill_mp_bus_to_cpumask); +postcore_initcall(early_fill_mp_bus_to_node); diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index e041ced0ce13..a67921ce60af 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -12,6 +12,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) { int n, devfn; + long node; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; @@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) u32 l; if (pci_find_bus(0, n)) continue; + node = get_mp_bus_to_node(n); for (devfn = 0; devfn < 256; devfn += 8) { if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_with_sysdata(n); + pci_scan_bus_on_node(n, &pci_root_ops, node); break; } } diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c new file mode 100644 index 000000000000..022943999b84 --- /dev/null +++ b/arch/x86/pci/mp_bus_to_node.c @@ -0,0 +1,23 @@ +#include +#include +#include + +#define BUS_NR 256 + +static unsigned char mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = (unsigned char) node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return 0; + node = mp_bus_to_node[busnum]; + return node; +} diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index ddd8e248fc0a..30bbde0cb34b 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -19,6 +19,8 @@ struct pci_sysdata { }; /* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, + int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); static inline int pci_domain_nr(struct pci_bus *bus) diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 22073268b481..4793ae745a78 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -198,4 +198,17 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define smt_capable() (smp_num_siblings > 1) #endif +#ifdef CONFIG_NUMA +extern int get_mp_bus_to_node(int busnum); +extern void set_mp_bus_to_node(int busnum, int node); +#else +static inline int get_mp_bus_to_node(int busnum) +{ + return 0; +} +static inline void set_mp_bus_to_node(int busnum, int node) +{ +} +#endif + #endif -- cgit v1.2.3 From 30a18d6c3f1e774de656ebd8ff219d53e2ba4029 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:20 -0800 Subject: x86: multi pci root bus with different io resource range, on 64-bit scan AMD opteron io/mmio routing to make sure every pci root bus get correct resource range. Thus later pci scan could assign correct resource to device with unassigned resource. this can fix a system without _CRS for multi pci root bus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_64 | 2 +- arch/x86/pci/k8-bus_64.c | 404 +++++++++++++++++++++++++++++++++++++++------ drivers/pci/probe.c | 6 + include/asm-x86/topology.h | 3 + include/linux/pci.h | 2 +- 5 files changed, 365 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf143..8fbd19832cf6 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index dab38310ee97..5e8a9d105edd 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -7,23 +7,29 @@ /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID(dword) ((dword>>4) & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0xE0 -#define LDT_BUS_NUMBER_REGISTER_1 0xE4 -#define LDT_BUS_NUMBER_REGISTER_2 0xE8 -#define LDT_BUS_NUMBER_REGISTER_3 0xEC -#define NR_LDT_BUS_NUMBER_REGISTERS 4 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 24) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 -#define PCI_DEVICE_ID_K8_10H_HTCONFIG 0x1200 -#define PCI_DEVICE_ID_K8_11H_HTCONFIG 0x1300 +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; #ifdef CONFIG_NUMA @@ -55,77 +61,375 @@ int get_mp_bus_to_node(int busnum) return node; } - #endif +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + if (!pci_root_num) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + if (start == range[j].start && end < range[j].end) { + range[j].start = end + 1; + break; + } else if (start == range[j].start && end == range[j].end) { + range[j].start = 0; + range[j].end = 0; + break; + } else if (start > range[j].start && end == range[j].end) { + range[j].end = start - 1; + break; + } else if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + break; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + if (res->flags != flags) + continue; + if (res->end + 1 == start) { + res->end = end; + return; + } else if (end + 1 == res->start) { + res->start = start; + return; + } + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -early_fill_mp_bus_to_node(void) +static int __init early_fill_mp_bus_info(void) { -#ifdef CONFIG_NUMA - int i, j; + int i; + int j; + unsigned bus; unsigned slot; - u32 ldtbus; - u32 id; + int found; int node; - u16 deviceid; - u16 vendorid; - int min_bus; - int max_bus; - - static int lbnr[NR_LDT_BUS_NUMBER_REGISTERS] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2, - LDT_BUS_NUMBER_REGISTER_3 - }; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) mp_bus_to_node[i] = -1; +#endif if (!early_pci_allowed()) return -1; - slot = 0x18; - id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; - vendorid = id & 0xffff; - if (vendorid != PCI_VENDOR_ID_AMD) - goto out; + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); - deviceid = (id>>16) & 0xffff; - if ((deviceid != PCI_DEVICE_ID_K8HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_10H_HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_11H_HTCONFIG)) - goto out; + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - ldtbus = read_pci_config(0, slot, 1, lbnr[i]); + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); /* Check if that register is enabled for bus range */ - if ((ldtbus & 7) != 3) + if ((reg & 7) != 3) continue; - min_bus = SECONDARY_LDT_BUS_NUMBER(ldtbus); - max_bus = SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - node = NODE_ID(ldtbus); + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA for (j = min_bus; j <= max_bus; j++) mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; } -out: + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_IO, 0); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + /* 0xfc00000000-0xfcffffffff for Family 10h mmconfig*/ + range[0].end = 0xfbffffffffULL; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_MEM, 0); + update_range(range, start, end); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) { node = mp_bus_to_node[i]; if (node >= 0) printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); } #endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } + } + return 0; } -postcore_initcall(early_fill_mp_bus_to_node); +postcore_initcall(early_fill_mp_bus_info); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a40043bd3257..4a55bf380957 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1088,6 +1088,10 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1147,6 +1151,8 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; + set_pci_bus_resources_arch_default(b); + return b; dev_create_file_err: diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 4793ae745a78..0e6d6b03affe 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -193,6 +193,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + #ifdef CONFIG_SMP #define mc_capable() (boot_cpu_data.x86_max_cores > 1) #define smt_capable() (smp_num_siblings > 1) diff --git a/include/linux/pci.h b/include/linux/pci.h index a71954a38932..abc998ffb66e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -254,7 +254,7 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 16 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ -- cgit v1.2.3 From cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:06 -0800 Subject: acpi: get boot_cpu_id as early for k8_scan_nodes [mingo@elte.hu: split from "x86_64: get boot_cpu_id as early for k8_scan_nodes] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/k8topology_64.c | 38 +++++++++++++++++++++++- include/linux/acpi.h | 5 ++++ 3 files changed, 112 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 977ed5cdeaa3..c49ebcc6c41e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -771,6 +771,32 @@ static void __init acpi_register_lapic_address(unsigned long address) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } +static int __init early_acpi_parse_madt_lapic_addr_ovr(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + acpi_register_lapic_address(acpi_lapic_addr); + + return count; +} + static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -901,6 +927,33 @@ static inline int acpi_parse_madt_ioapic_entries(void) } #endif /* !CONFIG_X86_IO_APIC */ +static void __init early_acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = early_acpi_parse_madt_lapic_addr_ovr(); + if (!error) { + acpi_lapic = 1; + smp_found_config = 1; + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif +} + static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -1233,6 +1286,23 @@ int __init acpi_boot_table_init(void) return 0; } +int __init early_acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + early_acpi_process_madt(); + + return 0; +} + int __init acpi_boot_init(void) { /* diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9c..1f476e477844 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -13,12 +13,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include static __init int find_northbridge(void) { @@ -44,6 +47,30 @@ static __init int find_northbridge(void) return -1; } +static __init void early_get_boot_cpu_id(void) +{ + /* + * need to get boot_cpu_id so can use that to create apicid_to_node + * in k8_scan_nodes() + */ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); +#ifdef CONFIG_ACPI + /* + * Read APIC information from ACPI tables. + */ + early_acpi_boot_init(); +#endif + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); + early_init_lapic_mapping(); +} + int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; @@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned cores; unsigned bits; int j; + unsigned apicid_base; if (!early_pci_allowed()) return -1; @@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) /* use the coreid bits from early_identify_cpu */ bits = boot_cpu_data.x86_coreid_bits; cores = (1< 0) { + printk(KERN_INFO "BSP APIC ID: %02x\n", + boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { nodeid = nodeids[i]; - for (j = 0; j < cores; j++) + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(nodeid << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2c7e003356ac..41f7ce7edd7a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); int acpi_numa_init (void); @@ -235,6 +236,10 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #else /* CONFIG_ACPI */ +static inline int early_acpi_boot_init(void) +{ + return 0; +} static inline int acpi_boot_init(void) { return 0; -- cgit v1.2.3