diff options
Diffstat (limited to 'drivers/iommu/amd_iommu.c')
-rw-r--r-- | drivers/iommu/amd_iommu.c | 545 |
1 files changed, 535 insertions, 10 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6d1cbdfc9b2a..55074cba20eb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -31,6 +31,12 @@ #include <linux/amd-iommu.h> #include <linux/notifier.h> #include <linux/export.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <asm/irq_remapping.h> +#include <asm/io_apic.h> +#include <asm/apic.h> +#include <asm/hw_irq.h> #include <asm/msidef.h> #include <asm/proto.h> #include <asm/iommu.h> @@ -39,6 +45,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -72,6 +79,9 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock); static LIST_HEAD(dev_data_list); static DEFINE_SPINLOCK(dev_data_list_lock); +LIST_HEAD(ioapic_map); +LIST_HEAD(hpet_map); + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -92,6 +102,8 @@ struct iommu_cmd { u32 data[4]; }; +struct kmem_cache *amd_iommu_irq_cache; + static void update_domain(struct protection_domain *domain); static int __init alloc_passthrough_domain(void); @@ -266,7 +278,7 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) static int iommu_init_device(struct device *dev) { - struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev); + struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; struct iommu_group *group; u16 alias; @@ -293,11 +305,18 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } else + } + + if (dma_pdev == NULL) dma_pdev = pci_dev_get(pdev); + /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + /* + * If it's a multifunction device that does not support our + * required ACS flags, add to the same group as function 0. + */ if (dma_pdev->multifunction && !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) swap_pci_ref(&dma_pdev, @@ -305,14 +324,28 @@ static int iommu_init_device(struct device *dev) PCI_DEVFN(PCI_SLOT(dma_pdev->devfn), 0))); + /* + * Devices on the root bus go through the iommu. If that's not us, + * find the next upstream device and test ACS up to the root bus. + * Finding the next device may require skipping virtual buses. + */ while (!pci_is_root_bus(dma_pdev->bus)) { - if (pci_acs_path_enabled(dma_pdev->bus->self, - NULL, REQ_ACS_FLAGS)) + struct pci_bus *bus = dma_pdev->bus; + + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + goto root_bus; + } + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; - swap_pci_ref(&dma_pdev, pci_dev_get(dma_pdev->bus->self)); + swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } +root_bus: group = iommu_group_get(&dma_pdev->dev); pci_dev_put(dma_pdev); if (!group) { @@ -665,7 +698,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) /* * Release iommu->lock because ppr-handling might need to - * re-aquire it + * re-acquire it */ spin_unlock_irqrestore(&iommu->lock, flags); @@ -783,7 +816,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); if (s) /* size bit - we flush more than one 4kb page */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -878,6 +911,13 @@ static void build_inv_all(struct iommu_cmd *cmd) CMD_SET_TYPE(cmd, CMD_INV_ALL); } +static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_IRT); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -999,12 +1039,32 @@ static void iommu_flush_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_irt(&cmd, devid); + + iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_irt_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + iommu_flush_irt(iommu, devid); + + iommu_completion_wait(iommu); +} + void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { iommu_flush_all(iommu); } else { iommu_flush_dte_all(iommu); + iommu_flush_irt_all(iommu); iommu_flush_tlb_all(iommu); } } @@ -2134,7 +2194,7 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev) } /* - * If a device is not yet associated with a domain, this function does + * If a device is not yet associated with a domain, this function * assigns it visible for the hardware */ static int attach_device(struct device *dev, @@ -2384,7 +2444,7 @@ static struct protection_domain *get_domain(struct device *dev) if (domain != NULL) return domain; - /* Device not bount yet - bind it */ + /* Device not bound yet - bind it */ dma_dom = find_protection_domain(devid); if (!dma_dom) dma_dom = amd_iommu_rlookup_table[devid]->default_dom; @@ -2923,7 +2983,7 @@ static void __init prealloc_protection_domains(void) alloc_passthrough_domain(); dev_data->passthrough = true; attach_device(&dev->dev, pt_domain); - pr_info("AMD-Vi: Using passthough domain for device %s\n", + pr_info("AMD-Vi: Using passthrough domain for device %s\n", dev_name(&dev->dev)); } @@ -3295,6 +3355,8 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return 1; + case IOMMU_CAP_INTR_REMAP: + return irq_remapping_enabled; } return 0; @@ -3722,3 +3784,466 @@ int amd_iommu_device_info(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL(amd_iommu_device_info); + +#ifdef CONFIG_IRQ_REMAP + +/***************************************************************************** + * + * Interrupt Remapping Implementation + * + *****************************************************************************/ + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + +static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +{ + u64 dte; + + dte = amd_iommu_dev_table[devid].data[2]; + dte &= ~DTE_IRQ_PHYS_ADDR_MASK; + dte |= virt_to_phys(table->table); + dte |= DTE_IRQ_REMAP_INTCTL; + dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_IRQ_REMAP_ENABLE; + + amd_iommu_dev_table[devid].data[2] = dte; +} + +#define IRTE_ALLOCATED (~1U) + +static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) +{ + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + goto out_unlock; + + table = irq_lookup_table[devid]; + if (table) + goto out; + + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + goto out; + } + + /* Nothing there yet, allocate new irq remapping table */ + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (!table) + goto out; + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ + table->min_index = 32; + + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); + table = NULL; + goto out; + } + + memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + + if (ioapic) { + int i; + + for (i = 0; i < 32; ++i) + table->table[i] = IRTE_ALLOCATED; + } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + if (devid != alias) { + irq_lookup_table[alias] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, alias); + } + +out: + iommu_completion_wait(iommu); + +out_unlock: + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return table; +} + +static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +{ + struct irq_remap_table *table; + unsigned long flags; + int index, c; + + table = get_irq_table(devid, false); + if (!table) + return -ENODEV; + + spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (c = 0, index = table->min_index; + index < MAX_IRQS_PER_TABLE; + ++index) { + if (table->table[index] == 0) + c += 1; + else + c = 0; + + if (c == count) { + struct irq_2_iommu *irte_info; + + for (; c != 0; --c) + table->table[index - c + 1] = IRTE_ALLOCATED; + + index -= count - 1; + + irte_info = &cfg->irq_2_iommu; + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + goto out; + } + } + + index = -ENOSPC; + +out: + spin_unlock_irqrestore(&table->lock, flags); + + return index; +} + +static int get_irte(u16 devid, int index, union irte *irte) +{ + struct irq_remap_table *table; + unsigned long flags; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + irte->val = table->table[index]; + spin_unlock_irqrestore(&table->lock, flags); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte irte) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte.val; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static void free_irte(u16 devid, int index) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return; + + table = get_irq_table(devid, false); + if (!table) + return; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = 0; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +} + +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + struct irq_remap_table *table; + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + int ioapic_id; + int index; + int devid; + int ret; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + ioapic_id = mpc_ioapic_id(attr->ioapic); + devid = get_ioapic_devid(ioapic_id); + + if (devid < 0) + return devid; + + table = get_irq_table(devid, true); + if (table == NULL) + return -ENOMEM; + + index = attr->ioapic_pin; + + /* Setup IRQ remapping info */ + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + /* Setup IRTE for IOMMU */ + irte.val = 0; + irte.fields.vector = vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = destination; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + ret = modify_irte(devid, index, irte); + if (ret) + return ret; + + /* Setup IOAPIC entry */ + memset(entry, 0, sizeof(*entry)); + + entry->vector = index; + entry->mask = 0; + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +static int set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_2_iommu *irte_info; + unsigned int dest, irq; + struct irq_cfg *cfg; + union irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + cfg = data->chip_data; + irq = data->irq; + irte_info = &cfg->irq_2_iommu; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + return -EBUSY; + + if (assign_irq_vector(irq, cfg, mask)) + return -EBUSY; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.fields.vector = cfg->vector; + irte.fields.destination = dest; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int free_irq(int irq) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + + free_irte(irte_info->sub_handle, irte_info->irte_index); + + return 0; +} + +static void compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return; + + irte_info = &cfg->irq_2_iommu; + + irte.val = 0; + irte.fields.vector = cfg->vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = dest; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->irte_index; +} + +static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +{ + struct irq_cfg *cfg; + int index; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + devid = get_device_id(&pdev->dev); + index = alloc_irq_index(cfg, devid, nvec); + + return index < 0 ? MAX_IRQS_PER_TABLE : index; +} + +static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int offset) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + if (index >= MAX_IRQS_PER_TABLE) + return 0; + + devid = get_device_id(&pdev->dev); + irte_info = &cfg->irq_2_iommu; + + irte_info->sub_handle = devid; + irte_info->irte_index = index + offset; + irte_info->iommu = (void *)cfg; + + return 0; +} + +static int setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + int index, devid; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + devid = get_hpet_devid(id); + if (devid < 0) + return devid; + + index = alloc_irq_index(cfg, devid, 1); + if (index < 0) + return index; + + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + return 0; +} + +struct irq_remap_ops amd_iommu_irq_ops = { + .supported = amd_iommu_supported, + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .setup_ioapic_entry = setup_ioapic_entry, + .set_affinity = set_affinity, + .free_irq = free_irq, + .compose_msi_msg = compose_msi_msg, + .msi_alloc_irq = msi_alloc_irq, + .msi_setup_irq = msi_setup_irq, + .setup_hpet_msi = setup_hpet_msi, +}; +#endif |