diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Makefile | 25 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 25 | ||||
-rw-r--r-- | drivers/xen/events.c | 152 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 2 | ||||
-rw-r--r-- | drivers/xen/gntalloc.c | 14 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 16 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 31 | ||||
-rw-r--r-- | drivers/xen/manage.c | 8 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 2 | ||||
-rw-r--r-- | drivers/xen/sys-hypervisor.c | 2 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 264 |
11 files changed, 449 insertions, 92 deletions
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f420f1ff7f1..bbc18258ecc 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,24 +1,25 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ +obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) -obj-$(CONFIG_BLOCK) += biomerge.o -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o -obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o -obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o +obj-$(CONFIG_BLOCK) += biomerge.o +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o +obj-$(CONFIG_XEN_XENCOMM) += xencomm.o +obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o +obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o +obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o -obj-$(CONFIG_XENFS) += xenfs/ +obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o -obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o -obj-$(CONFIG_XEN_DOM0) += pci.o +obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o +obj-$(CONFIG_XEN_DOM0) += pci.o -xen-evtchn-y := evtchn.o +xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o -xen-platform-pci-y := platform-pci.o +xen-platform-pci-y := platform-pci.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 043af8ad6b6..f54290baa3d 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -114,7 +114,6 @@ static void __balloon_append(struct page *page) if (PageHighMem(page)) { list_add_tail(&page->lru, &ballooned_pages); balloon_stats.balloon_high++; - dec_totalhigh_pages(); } else { list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; @@ -124,6 +123,8 @@ static void __balloon_append(struct page *page) static void balloon_append(struct page *page) { __balloon_append(page); + if (PageHighMem(page)) + dec_totalhigh_pages(); totalram_pages--; } @@ -193,7 +194,7 @@ static enum bp_state update_schedule(enum bp_state state) return BP_EAGAIN; } -static unsigned long current_target(void) +static long current_credit(void) { unsigned long target = balloon_stats.target_pages; @@ -202,7 +203,7 @@ static unsigned long current_target(void) balloon_stats.balloon_low + balloon_stats.balloon_high); - return target; + return target - balloon_stats.current_pages; } static enum bp_state increase_reservation(unsigned long nr_pages) @@ -246,7 +247,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ - if (!xen_hvm_domain() && pfn < max_low_pfn) { + if (xen_pv_domain() && !PageHighMem(page)) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -293,7 +294,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) scrub_page(page); - if (!xen_hvm_domain() && !PageHighMem(page)) { + if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); @@ -337,7 +338,7 @@ static void balloon_process(struct work_struct *work) mutex_lock(&balloon_mutex); do { - credit = current_target() - balloon_stats.current_pages; + credit = current_credit(); if (credit > 0) state = increase_reservation(credit); @@ -420,7 +421,7 @@ void free_xenballooned_pages(int nr_pages, struct page** pages) } /* The balloon may be too large now. Shrink it if needed. */ - if (current_target() != balloon_stats.current_pages) + if (current_credit()) schedule_delayed_work(&balloon_worker, 0); mutex_unlock(&balloon_mutex); @@ -429,7 +430,7 @@ EXPORT_SYMBOL(free_xenballooned_pages); static int __init balloon_init(void) { - unsigned long pfn, nr_pages, extra_pfn_end; + unsigned long pfn, extra_pfn_end; struct page *page; if (!xen_domain()) @@ -437,11 +438,7 @@ static int __init balloon_init(void) pr_info("xen/balloon: Initialising balloon driver.\n"); - if (xen_pv_domain()) - nr_pages = xen_start_info->nr_pages; - else - nr_pages = max_pfn; - balloon_stats.current_pages = min(nr_pages, max_pfn); + balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -466,7 +463,7 @@ static int __init balloon_init(void) pfn < extra_pfn_end; pfn++) { page = pfn_to_page(pfn); - /* totalram_pages doesn't include the boot-time + /* totalram_pages and totalhigh_pages do not include the boot-time balloon extension, so don't subtract from it. */ __balloon_append(page); } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 33167b43ac7..553da68bd51 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -101,6 +101,7 @@ struct irq_info unsigned short gsi; unsigned char vector; unsigned char flags; + uint16_t domid; } pirq; } u; }; @@ -118,6 +119,8 @@ static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; +static void enable_dynirq(struct irq_data *data); +static void disable_dynirq(struct irq_data *data); /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) @@ -184,6 +187,7 @@ static void xen_irq_info_pirq_init(unsigned irq, unsigned short pirq, unsigned short gsi, unsigned short vector, + uint16_t domid, unsigned char flags) { struct irq_info *info = info_for_irq(irq); @@ -193,6 +197,7 @@ static void xen_irq_info_pirq_init(unsigned irq, info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.vector = vector; + info->u.pirq.domid = domid; info->u.pirq.flags = flags; } @@ -473,16 +478,6 @@ static void xen_free_irq(unsigned irq) irq_free_desc(irq); } -static void pirq_unmask_notify(int irq) -{ - struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; - - if (unlikely(pirq_needs_eoi(irq))) { - int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); - WARN_ON(rc); - } -} - static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@ -506,6 +501,29 @@ static bool probing_irq(int irq) return desc && desc->action == NULL; } +static void eoi_pirq(struct irq_data *data) +{ + int evtchn = evtchn_from_irq(data->irq); + struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; + int rc = 0; + + irq_move_irq(data); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); + + if (pirq_needs_eoi(data->irq)) { + rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + WARN_ON(rc); + } +} + +static void mask_ack_pirq(struct irq_data *data) +{ + disable_dynirq(data); + eoi_pirq(data); +} + static unsigned int __startup_pirq(unsigned int irq) { struct evtchn_bind_pirq bind_pirq; @@ -539,7 +557,7 @@ static unsigned int __startup_pirq(unsigned int irq) out: unmask_evtchn(evtchn); - pirq_unmask_notify(irq); + eoi_pirq(irq_get_irq_data(irq)); return 0; } @@ -579,18 +597,7 @@ static void enable_pirq(struct irq_data *data) static void disable_pirq(struct irq_data *data) { -} - -static void ack_pirq(struct irq_data *data) -{ - int evtchn = evtchn_from_irq(data->irq); - - irq_move_irq(data); - - if (VALID_EVTCHN(evtchn)) { - mask_evtchn(evtchn); - clear_evtchn(evtchn); - } + disable_dynirq(data); } static int find_irq_by_gsi(unsigned gsi) @@ -619,6 +626,9 @@ int xen_allocate_pirq_gsi(unsigned gsi) * * Note: We don't assign an event channel until the irq actually started * up. Return an existing irq if we've already got one for the gsi. + * + * Shareable implies level triggered, not shareable implies edge + * triggered here. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) @@ -639,9 +649,6 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, - name); - irq_op.irq = irq; irq_op.vector = 0; @@ -655,9 +662,32 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, goto out; } - xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, + xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); + pirq_query_unmask(irq); + /* We try to use the handler with the appropriate semantic for the + * type of interrupt: if the interrupt is an edge triggered + * interrupt we use handle_edge_irq. + * + * On the other hand if the interrupt is level triggered we use + * handle_fasteoi_irq like the native code does for this kind of + * interrupts. + * + * Depending on the Xen version, pirq_needs_eoi might return true + * not only for level triggered interrupts but for edge triggered + * interrupts too. In any case Xen always honors the eoi mechanism, + * not injecting any more pirqs of the same kind if the first one + * hasn't received an eoi yet. Therefore using the fasteoi handler + * is the right choice either way. + */ + if (shareable) + irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + handle_fasteoi_irq, name); + else + irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + handle_edge_irq, name); + out: spin_unlock(&irq_mapping_update_lock); @@ -680,7 +710,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) } int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name) + int pirq, int vector, const char *name, + domid_t domid) { int irq, ret; @@ -690,10 +721,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (irq == -1) goto out; - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, - name); + irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, + name); - xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0); + xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); ret = irq_set_msi_desc(irq, msidesc); if (ret < 0) goto error_irq; @@ -722,9 +753,16 @@ int xen_destroy_irq(int irq) if (xen_initial_domain()) { unmap_irq.pirq = info->u.pirq.pirq; - unmap_irq.domid = DOMID_SELF; + unmap_irq.domid = info->u.pirq.domid; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); - if (rc) { + /* If another domain quits without making the pci_disable_msix + * call, the Xen hypervisor takes care of freeing the PIRQs + * (free_domain_pirqs). + */ + if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) + printk(KERN_INFO "domain %d does not have %d anymore\n", + info->u.pirq.domid, info->u.pirq.pirq); + else if (rc) { printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } @@ -759,6 +797,12 @@ out: return irq; } + +int xen_pirq_from_irq(unsigned irq) +{ + return pirq_from_irq(irq); +} +EXPORT_SYMBOL_GPL(xen_pirq_from_irq); int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -773,7 +817,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) goto out; irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_fasteoi_irq, "event"); + handle_edge_irq, "event"); xen_irq_info_evtchn_init(irq, evtchn); } @@ -1179,9 +1223,6 @@ static void __xen_evtchn_do_upcall(void) port = (word_idx * BITS_PER_LONG) + bit_idx; irq = evtchn_to_irq[port]; - mask_evtchn(port); - clear_evtchn(port); - if (irq != -1) { desc = irq_to_desc(irq); if (desc) @@ -1337,10 +1378,16 @@ static void ack_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); - irq_move_masked_irq(data); + irq_move_irq(data); if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); + clear_evtchn(evtchn); +} + +static void mask_ack_dynirq(struct irq_data *data) +{ + disable_dynirq(data); + ack_dynirq(data); } static int retrigger_dynirq(struct irq_data *data) @@ -1502,6 +1549,18 @@ void xen_poll_irq(int irq) xen_poll_irq_timeout(irq, 0 /* no timeout */); } +/* Check whether the IRQ line is shared with other guests. */ +int xen_test_irq_shared(int irq) +{ + struct irq_info *info = info_for_irq(irq); + struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; + + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + return 0; + return !(irq_status.flags & XENIRQSTAT_shared); +} +EXPORT_SYMBOL_GPL(xen_test_irq_shared); + void xen_irq_resume(void) { unsigned int cpu, evtchn; @@ -1535,7 +1594,9 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_mask = disable_dynirq, .irq_unmask = enable_dynirq, - .irq_eoi = ack_dynirq, + .irq_ack = ack_dynirq, + .irq_mask_ack = mask_ack_dynirq, + .irq_set_affinity = set_affinity_irq, .irq_retrigger = retrigger_dynirq, }; @@ -1545,14 +1606,15 @@ static struct irq_chip xen_pirq_chip __read_mostly = { .irq_startup = startup_pirq, .irq_shutdown = shutdown_pirq, - .irq_enable = enable_pirq, - .irq_unmask = enable_pirq, - .irq_disable = disable_pirq, - .irq_mask = disable_pirq, - .irq_ack = ack_pirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = eoi_pirq, + .irq_eoi = eoi_pirq, + .irq_mask_ack = mask_ack_pirq, .irq_set_affinity = set_affinity_irq, diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index ef11daf0caf..dbc13e94b61 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -470,7 +470,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) filp->private_data = u; - return nonseekable_open(inode, filp);; + return nonseekable_open(inode, filp); } static int evtchn_release(struct inode *inode, struct file *filp) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index a7ffdfe19fc..f6832f46aea 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -427,6 +427,17 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, return 0; } +static void gntalloc_vma_open(struct vm_area_struct *vma) +{ + struct gntalloc_gref *gref = vma->vm_private_data; + if (!gref) + return; + + spin_lock(&gref_lock); + gref->users++; + spin_unlock(&gref_lock); +} + static void gntalloc_vma_close(struct vm_area_struct *vma) { struct gntalloc_gref *gref = vma->vm_private_data; @@ -441,6 +452,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) } static struct vm_operations_struct gntalloc_vmops = { + .open = gntalloc_vma_open, .close = gntalloc_vma_close, }; @@ -471,8 +483,6 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = gref; vma->vm_flags |= VM_RESERVED; - vma->vm_flags |= VM_DONTCOPY; - vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; vma->vm_ops = &gntalloc_vmops; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b0f9e8fb005..f914b26cf0c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -330,17 +330,26 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) /* ------------------------------------------------------------------ */ +static void gntdev_vma_open(struct vm_area_struct *vma) +{ + struct grant_map *map = vma->vm_private_data; + + pr_debug("gntdev_vma_open %p\n", vma); + atomic_inc(&map->users); +} + static void gntdev_vma_close(struct vm_area_struct *vma) { struct grant_map *map = vma->vm_private_data; - pr_debug("close %p\n", vma); + pr_debug("gntdev_vma_close %p\n", vma); map->vma = NULL; vma->vm_private_data = NULL; gntdev_put_map(map); } static struct vm_operations_struct gntdev_vmops = { + .open = gntdev_vma_open, .close = gntdev_vma_close, }; @@ -652,7 +661,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; + vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; + + if (use_ptemod) + vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP; vma->vm_private_data = map; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 3745a318def..fd725cde6ad 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (map_ops[i].status) continue; - /* m2p override only supported for GNTMAP_contains_pte mappings */ - if (!(map_ops[i].flags & GNTMAP_contains_pte)) - continue; - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + (map_ops[i].host_addr & ~PAGE_MASK)); - mfn = pte_mfn(*pte); - ret = m2p_add_override(mfn, pages[i]); + mfn = pte_mfn(*pte); + } else { + /* If you really wanted to do this: + * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); + * + * The reason we do not implement it is b/c on the + * unmap path (gnttab_unmap_refs) we have no means of + * checking whether the page is !GNTMAP_contains_pte. + * + * That is without some extra data-structure to carry + * the struct page, bool clear_pte, and list_head next + * tuples and deal with allocation/delallocation, etc. + * + * The users of this API set the GNTMAP_contains_pte + * flag so lets just return not supported until it + * becomes neccessary to implement. + */ + return -EOPNOTSUPP; + } + ret = m2p_add_override(mfn, pages[i], + map_ops[i].flags & GNTMAP_contains_pte); if (ret) return ret; } @@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i]); + ret = m2p_remove_override(pages[i], true /* clear the PTE */); if (ret) return ret; } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index a2eee574784..0b5366b5be2 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -70,12 +70,7 @@ static int xen_suspend(void *data) BUG_ON(!irqs_disabled()); - err = sysdev_suspend(PMSG_FREEZE); - if (!err) { - err = syscore_suspend(); - if (err) - sysdev_resume(); - } + err = syscore_suspend(); if (err) { printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n", err); @@ -102,7 +97,6 @@ static int xen_suspend(void *data) } syscore_resume(); - sysdev_resume(); return 0; } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 5ac3f4843f5..6e8c15a2320 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -54,7 +54,7 @@ u64 start_dma_addr; static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { - return phys_to_machine(XPADDR(paddr)).maddr;; + return phys_to_machine(XPADDR(paddr)).maddr; } static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 60f1827a32c..1e0fe01eb67 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -215,7 +215,7 @@ static struct attribute_group xen_compilation_group = { .attrs = xen_compile_attrs, }; -int __init static xen_compilation_init(void) +static int __init xen_compilation_init(void) { return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); } diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c new file mode 100644 index 00000000000..816a44959ef --- /dev/null +++ b/drivers/xen/tmem.c @@ -0,0 +1,264 @@ +/* + * Xen implementation for transcendent memory (tmem) + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/cleancache.h> + +#include <xen/xen.h> +#include <xen/interface/xen.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> +#include <asm/xen/hypervisor.h> + +#define TMEM_CONTROL 0 +#define TMEM_NEW_POOL 1 +#define TMEM_DESTROY_POOL 2 +#define TMEM_NEW_PAGE 3 +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#define TMEM_READ 8 +#define TMEM_WRITE 9 +#define TMEM_XCHG 10 + +/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_VERSION_SHIFT 24 + + +struct tmem_pool_uuid { + u64 uuid_lo; + u64 uuid_hi; +}; + +struct tmem_oid { + u64 oid[3]; +}; + +#define TMEM_POOL_PRIVATE_UUID { 0, 0 } + +/* flags for tmem_ops.new_pool */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 + +/* xen tmem foundation ops/hypercalls */ + +static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, + u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) +{ + struct tmem_op op; + int rc = 0; + + op.cmd = tmem_cmd; + op.pool_id = tmem_pool; + op.u.gen.oid[0] = oid.oid[0]; + op.u.gen.oid[1] = oid.oid[1]; + op.u.gen.oid[2] = oid.oid[2]; + op.u.gen.index = index; + op.u.gen.tmem_offset = tmem_offset; + op.u.gen.pfn_offset = pfn_offset; + op.u.gen.len = len; + set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); + rc = HYPERVISOR_tmem_op(&op); + return rc; +} + +static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, + u32 flags, unsigned long pagesize) +{ + struct tmem_op op; + int rc = 0, pageshift; + + for (pageshift = 0; pagesize != 1; pageshift++) + pagesize >>= 1; + flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; + flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; + op.cmd = TMEM_NEW_POOL; + op.u.new.uuid[0] = uuid.uuid_lo; + op.u.new.uuid[1] = uuid.uuid_hi; + op.u.new.flags = flags; + rc = HYPERVISOR_tmem_op(&op); + return rc; +} + +/* xen generic tmem ops */ + +static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, + u32 index, unsigned long pfn) +{ + unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; + + return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, + gmfn, 0, 0, 0); +} + +static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, + u32 index, unsigned long pfn) +{ + unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; + + return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, + gmfn, 0, 0, 0); +} + +static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) +{ + return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, + 0, 0, 0, 0); +} + +static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) +{ + return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); +} + +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + +int tmem_enabled; + +static int __init enable_tmem(char *s) +{ + tmem_enabled = 1; + return 1; +} + +__setup("tmem", enable_tmem); + +/* cleancache ops */ + +static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + unsigned long pfn = page_to_pfn(page); + + if (pool < 0) + return; + if (ind != index) + return; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); +} + +static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + unsigned long pfn = page_to_pfn(page); + int ret; + + /* translate return values to linux semantics */ + if (pool < 0) + return -1; + if (ind != index) + return -1; + ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); + if (ret == 1) + return 0; + else + return -1; +} + +static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, + pgoff_t index) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (pool < 0) + return; + if (ind != index) + return; + (void)xen_tmem_flush_page((u32)pool, oid, ind); +} + +static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) +{ + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (pool < 0) + return; + (void)xen_tmem_flush_object((u32)pool, oid); +} + +static void tmem_cleancache_flush_fs(int pool) +{ + if (pool < 0) + return; + (void)xen_tmem_destroy_pool((u32)pool); +} + +static int tmem_cleancache_init_fs(size_t pagesize) +{ + struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; + + return xen_tmem_new_pool(uuid_private, 0, pagesize); +} + +static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) +{ + struct tmem_pool_uuid shared_uuid; + + shared_uuid.uuid_lo = *(u64 *)uuid; + shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); + return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); +} + +static int use_cleancache = 1; + +static int __init no_cleancache(char *s) +{ + use_cleancache = 0; + return 1; +} + +__setup("nocleancache", no_cleancache); + +static struct cleancache_ops tmem_cleancache_ops = { + .put_page = tmem_cleancache_put_page, + .get_page = tmem_cleancache_get_page, + .flush_page = tmem_cleancache_flush_page, + .flush_inode = tmem_cleancache_flush_inode, + .flush_fs = tmem_cleancache_flush_fs, + .init_shared_fs = tmem_cleancache_init_shared_fs, + .init_fs = tmem_cleancache_init_fs +}; + +static int __init xen_tmem_init(void) +{ + struct cleancache_ops old_ops; + + if (!xen_domain()) + return 0; +#ifdef CONFIG_CLEANCACHE + BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); + if (tmem_enabled && use_cleancache) { + char *s = ""; + old_ops = cleancache_register_ops(&tmem_cleancache_ops); + if (old_ops.init_fs != NULL) + s = " (WARNING: cleancache_ops overridden)"; + printk(KERN_INFO "cleancache enabled, RAM provided by " + "Xen Transcendent Memory%s\n", s); + } +#endif + return 0; +} + +module_init(xen_tmem_init) |