summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-02-09 14:34:03 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2017-02-09 14:34:03 +1100
commit2d78974a9e2322ceaa10b34785191ce3c6a6c7b9 (patch)
treef4fbc07b8689c41354db196af373dcce0585ddee
parent890b787db3edaf265e25db4e25b5375eb1a7cdf0 (diff)
parentc0d197d55e8e8aeeea55f79bdf67e1c957bfa25d (diff)
Merge remote-tracking branch 'xen-tip/linux-next'
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/Makefile1
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c279
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--arch/x86/xen/platform-pci-unplug.c4
-rw-r--r--arch/x86/xen/setup.c37
-rw-r--r--arch/x86/xen/smp.c78
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/xen-head.S62
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--arch/x86/xen/xen-pvh.S161
-rw-r--r--drivers/net/xen-netback/netback.c6
-rw-r--r--drivers/net/xen-netfront.c6
-rw-r--r--drivers/xen/cpu_hotplug.c2
-rw-r--r--drivers/xen/events/events_base.c1
-rw-r--r--drivers/xen/grant-table.c8
-rw-r--r--drivers/xen/manage.c2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c39
-rw-r--r--include/xen/interface/elfnote.h12
-rw-r--r--include/xen/interface/hvm/hvm_vcpu.h143
-rw-r--r--include/xen/interface/hvm/start_info.h98
-rw-r--r--include/xen/xen.h12
23 files changed, 627 insertions, 358 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c7b15f3e2cf3..76b6dbd627df 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -53,5 +53,5 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
- depends on X86_64 && XEN && XEN_PVHVM
+ depends on XEN && XEN_PVHVM && ACPI
def_bool n
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e52787d32..cb0164aee156 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_DOM0) += vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
obj-$(CONFIG_XEN_EFI) += efi.o
+obj-$(CONFIG_XEN_PVH) += xen-pvh.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 44c88ad1841a..bcea81f36fc5 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -145,7 +145,7 @@ static void xen_silent_inquire(int apicid)
static int xen_cpu_present_to_apicid(int cpu)
{
if (cpu_present(cpu))
- return xen_get_apic_id(xen_apic_read(APIC_ID));
+ return cpu_data(cpu).apicid;
else
return BAD_APICID;
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..ec1d5c46e58f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
#include <xen/interface/memory.h>
#include <xen/interface/nmi.h>
#include <xen/interface/xen-mca.h>
+#include <xen/interface/hvm/start_info.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvm.h>
@@ -176,6 +177,20 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
+#ifdef CONFIG_XEN_PVH
+/*
+ * PVH variables.
+ *
+ * xen_pvh and pvh_bootparams need to live in data segment since they
+ * are used after startup_{32|64}, which clear .bss, are invoked.
+ */
+bool xen_pvh __attribute__((section(".data"))) = 0;
+struct boot_params pvh_bootparams __attribute__((section(".data")));
+
+struct hvm_start_info pvh_start_info;
+unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+#endif
+
static void clamp_max_cpus(void)
{
#ifdef CONFIG_SMP
@@ -1138,10 +1153,11 @@ void xen_setup_vcpu_info_placement(void)
xen_vcpu_setup(cpu);
}
- /* xen_vcpu_setup managed to place the vcpu_info within the
- * percpu area for all cpus, so make use of it. Note that for
- * PVH we want to use native IRQ mechanism. */
- if (have_vcpu_info_placement && !xen_pvh_domain()) {
+ /*
+ * xen_vcpu_setup managed to place the vcpu_info within the
+ * percpu area for all cpus, so make use of it.
+ */
+ if (have_vcpu_info_placement) {
pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1413,49 +1429,9 @@ static void __init xen_boot_params_init_edd(void)
* Set up the GDT and segment registers for -fstack-protector. Until
* we do this, we have to be careful not to call any stack-protected
* function, which is most of the kernel.
- *
- * Note, that it is __ref because the only caller of this after init
- * is PVH which is not going to use xen_load_gdt_boot or other
- * __init functions.
*/
-static void __ref xen_setup_gdt(int cpu)
+static void xen_setup_gdt(int cpu)
{
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_X86_64
- unsigned long dummy;
-
- load_percpu_segment(cpu); /* We need to access per-cpu area */
- switch_to_new_gdt(cpu); /* GDT and GS set */
-
- /* We are switching of the Xen provided GDT to our HVM mode
- * GDT. The new GDT has __KERNEL_CS with CS.L = 1
- * and we are jumping to reload it.
- */
- asm volatile ("pushq %0\n"
- "leaq 1f(%%rip),%0\n"
- "pushq %0\n"
- "lretq\n"
- "1:\n"
- : "=&r" (dummy) : "0" (__KERNEL_CS));
-
- /*
- * While not needed, we also set the %es, %ds, and %fs
- * to zero. We don't care about %ss as it is NULL.
- * Strictly speaking this is not needed as Xen zeros those
- * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
- *
- * Linux zeros them in cpu_init() and in secondary_startup_64
- * (for BSP).
- */
- loadsegment(es, 0);
- loadsegment(ds, 0);
- loadsegment(fs, 0);
-#else
- /* PVH: TODO Implement. */
- BUG();
-#endif
- return; /* PVH does not need any PV GDT ops. */
- }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
@@ -1466,59 +1442,6 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
-#ifdef CONFIG_XEN_PVH
-/*
- * A PV guest starts with default flags that are not set for PVH, set them
- * here asap.
- */
-static void xen_pvh_set_cr_flags(int cpu)
-{
-
- /* Some of these are setup in 'secondary_startup_64'. The others:
- * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
- * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
- write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
-
- if (!cpu)
- return;
- /*
- * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
- * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
- */
- if (boot_cpu_has(X86_FEATURE_PSE))
- cr4_set_bits_and_update_boot(X86_CR4_PSE);
-
- if (boot_cpu_has(X86_FEATURE_PGE))
- cr4_set_bits_and_update_boot(X86_CR4_PGE);
-}
-
-/*
- * Note, that it is ref - because the only caller of this after init
- * is PVH which is not going to use xen_load_gdt_boot or other
- * __init functions.
- */
-void __ref xen_pvh_secondary_vcpu_init(int cpu)
-{
- xen_setup_gdt(cpu);
- xen_pvh_set_cr_flags(cpu);
-}
-
-static void __init xen_pvh_early_guest_init(void)
-{
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
- BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
-
- xen_pvh_early_cpu_init(0, false);
- xen_pvh_set_cr_flags(0);
-
-#ifdef CONFIG_X86_32
- BUG(); /* PVH: Implement proper support. */
-#endif
-}
-#endif /* CONFIG_XEN_PVH */
-
static void __init xen_dom0_set_legacy_features(void)
{
x86_platform.legacy.rtc = 1;
@@ -1555,24 +1478,17 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features();
-#ifdef CONFIG_XEN_PVH
- xen_pvh_early_guest_init();
-#endif
+
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- if (!xen_pvh_domain()) {
- pv_cpu_ops = xen_cpu_ops;
+ pv_cpu_ops = xen_cpu_ops;
- x86_platform.get_nmi_reason = xen_get_nmi_reason;
- }
+ x86_platform.get_nmi_reason = xen_get_nmi_reason;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
- else
- x86_init.resources.memory_setup = xen_memory_setup;
+ x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
@@ -1665,18 +1581,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* set the limit of our address space */
xen_reserve_top();
- /* PVH: runs at default kernel iopl of 0 */
- if (!xen_pvh_domain()) {
- /*
- * We used to do this in xen_arch_setup, but that is too late
- * on AMD were early_cpu_init (run before ->arch_setup()) calls
- * early_amd_init which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
- }
+ /*
+ * We used to do this in xen_arch_setup, but that is too late
+ * on AMD were early_cpu_init (run before ->arch_setup()) calls
+ * early_amd_init which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
@@ -1758,6 +1671,102 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
}
+#ifdef CONFIG_XEN_PVH
+
+static void xen_pvh_arch_setup(void)
+{
+#ifdef CONFIG_ACPI
+ /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
+ if (nr_ioapics == 0)
+ acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+#endif
+}
+
+static void __init init_pvh_bootparams(void)
+{
+ struct xen_memory_map memmap;
+ unsigned int i;
+ int rc;
+
+ memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+
+ memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
+ set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if (rc) {
+ xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+ BUG();
+ }
+
+ if (memmap.nr_entries < E820MAX - 1) {
+ pvh_bootparams.e820_map[memmap.nr_entries].addr =
+ ISA_START_ADDRESS;
+ pvh_bootparams.e820_map[memmap.nr_entries].size =
+ ISA_END_ADDRESS - ISA_START_ADDRESS;
+ pvh_bootparams.e820_map[memmap.nr_entries].type =
+ E820_RESERVED;
+ memmap.nr_entries++;
+ } else
+ xen_raw_printk("Warning: Can fit ISA range into e820\n");
+
+ sanitize_e820_map(pvh_bootparams.e820_map,
+ ARRAY_SIZE(pvh_bootparams.e820_map),
+ &memmap.nr_entries);
+
+ pvh_bootparams.e820_entries = memmap.nr_entries;
+ for (i = 0; i < pvh_bootparams.e820_entries; i++)
+ e820_add_region(pvh_bootparams.e820_map[i].addr,
+ pvh_bootparams.e820_map[i].size,
+ pvh_bootparams.e820_map[i].type);
+
+ pvh_bootparams.hdr.cmd_line_ptr =
+ pvh_start_info.cmdline_paddr;
+
+ /* The first module is always ramdisk. */
+ if (pvh_start_info.nr_modules) {
+ struct hvm_modlist_entry *modaddr =
+ __va(pvh_start_info.modlist_paddr);
+ pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+ pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+ }
+
+ /*
+ * See Documentation/x86/boot.txt.
+ *
+ * Version 2.12 supports Xen entry point but we will use default x86/PC
+ * environment (i.e. hardware_subarch 0).
+ */
+ pvh_bootparams.hdr.version = 0x212;
+ pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+}
+
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later.
+ */
+void __init xen_prepare_pvh(void)
+{
+ u32 msr;
+ u64 pfn;
+
+ if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+ xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+ pvh_start_info.magic);
+ BUG();
+ }
+
+ xen_pvh = 1;
+
+ msr = cpuid_ebx(xen_cpuid_base() + 2);
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+ init_pvh_bootparams();
+
+ x86_init.oem.arch_setup = xen_pvh_arch_setup;
+}
+#endif
+
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
@@ -1797,20 +1806,29 @@ void __ref xen_hvm_init_shared_info(void)
static void __init init_hvm_pv_info(void)
{
int major, minor;
- uint32_t eax, ebx, ecx, edx, pages, msr, base;
- u64 pfn;
+ uint32_t eax, ebx, ecx, edx, base;
base = xen_cpuid_base();
- cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+ eax = cpuid_eax(base + 1);
major = eax >> 16;
minor = eax & 0xffff;
printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
- cpuid(base + 2, &pages, &msr, &ecx, &edx);
+ xen_domain_type = XEN_HVM_DOMAIN;
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+ /* PVH set up hypercall page in xen_prepare_pvh(). */
+ if (xen_pvh_domain())
+ pv_info.name = "Xen PVH";
+ else {
+ u64 pfn;
+ uint32_t msr;
+
+ pv_info.name = "Xen HVM";
+ msr = cpuid_ebx(base + 2);
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+ }
xen_setup_features();
@@ -1819,10 +1837,6 @@ static void __init init_hvm_pv_info(void)
this_cpu_write(xen_vcpu_id, ebx);
else
this_cpu_write(xen_vcpu_id, smp_processor_id());
-
- pv_info.name = "Xen HVM";
-
- xen_domain_type = XEN_HVM_DOMAIN;
}
#endif
@@ -1910,6 +1924,9 @@ static void __init xen_hvm_guest_init(void)
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
+
+ if (xen_pvh_domain())
+ machine_ops.emergency_restart = xen_emergency_restart;
#ifdef CONFIG_KEXEC_CORE
machine_ops.shutdown = xen_hvm_shutdown;
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7d5afdb417cc..f6740b5b1738 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1792,10 +1792,6 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
- /* For PVH no need to set R/O or R/W to pin them or unpin them. */
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
@@ -1902,8 +1898,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* level2_ident_pgt, and level2_kernel_pgt. This means that only the
* kernel has a physical mapping to start with - but that's enough to
* get __va working. We need to fill in the rest of the physical
- * mapping once some sort of allocator has been set up. NOTE: for
- * PVH, the page tables are native.
+ * mapping once some sort of allocator has been set up.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
@@ -2812,16 +2807,6 @@ static int do_remap_gfn(struct vm_area_struct *vma,
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_XEN_PVH
- /* We need to update the local page tables and the xen HAP */
- return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
- prot, domid, pages);
-#else
- return -EINVAL;
-#endif
- }
-
rmd.mfn = gfn;
rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contiguous
@@ -2915,10 +2900,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0;
-#ifdef CONFIG_XEN_PVH
- return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
-#else
return -EINVAL;
-#endif
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 90d1b83cf35f..33a783c77d96 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -73,8 +73,8 @@ bool xen_has_pv_devices(void)
if (!xen_domain())
return false;
- /* PV domains always have them. */
- if (xen_pv_domain())
+ /* PV and PVH domains always have them. */
+ if (xen_pv_domain() || xen_pvh_domain())
return true;
/* And user has xen_platform_pci=0 set in guest config as
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f3f7b41116f7..a8c306cf8868 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -915,39 +915,6 @@ char * __init xen_memory_setup(void)
}
/*
- * Machine specific memory setup for auto-translated guests.
- */
-char * __init xen_auto_xlated_memory_setup(void)
-{
- struct xen_memory_map memmap;
- int i;
- int rc;
-
- memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
- set_xen_guest_handle(memmap.buffer, xen_e820_map);
-
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
- if (rc < 0)
- panic("No memory map (%d)\n", rc);
-
- xen_e820_map_entries = memmap.nr_entries;
-
- sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
- &xen_e820_map_entries);
-
- for (i = 0; i < xen_e820_map_entries; i++)
- e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
- xen_e820_map[i].type);
-
- /* Remove p2m info, it is not needed. */
- xen_start_info->mfn_list = 0;
- xen_start_info->first_p2m_pfn = 0;
- xen_start_info->nr_p2m_frames = 0;
-
- return "Xen";
-}
-
-/*
* Set the bit indicating "nosegneg" library variants should be used.
* We only need to bother in pure 32-bit mode; compat 32-bit processes
* can have un-truncated segments, so wrapping around is allowed.
@@ -1032,8 +999,8 @@ void __init xen_pvmmu_arch_setup(void)
void __init xen_arch_setup(void)
{
xen_panic_handler_init();
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- xen_pvmmu_arch_setup();
+
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..0dee6f59ea82 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -99,18 +99,8 @@ static void cpu_bringup(void)
local_irq_enable();
}
-/*
- * Note: cpu parameter is only relevant for PVH. The reason for passing it
- * is we can't do smp_processor_id until the percpu segments are loaded, for
- * which we need the cpu number! So we pass it in rdi as first parameter.
- */
-asmlinkage __visible void cpu_bringup_and_idle(int cpu)
+asmlinkage __visible void cpu_bringup_and_idle(void)
{
-#ifdef CONFIG_XEN_PVH
- if (xen_feature(XENFEAT_auto_translated_physmap) &&
- xen_feature(XENFEAT_supervisor_mode_kernel))
- xen_pvh_secondary_vcpu_init(cpu);
-#endif
cpu_bringup();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
@@ -404,61 +394,47 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);
#ifdef CONFIG_X86_32
- /* Note: PVH is not yet supported on x86_32. */
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
- ctxt->user_regs.ss = __KERNEL_DS;
+ ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+ ctxt->flags = VGCF_IN_KERNEL;
+ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+ ctxt->user_regs.ds = __USER_DS;
+ ctxt->user_regs.es = __USER_DS;
+ ctxt->user_regs.ss = __KERNEL_DS;
- xen_copy_trap_info(ctxt->trap_ctxt);
+ xen_copy_trap_info(ctxt->trap_ctxt);
- ctxt->ldt_ents = 0;
+ ctxt->ldt_ents = 0;
- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ ctxt->gdt_frames[0] = gdt_mfn;
+ ctxt->gdt_ents = GDT_ENTRIES;
- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_ss = __KERNEL_DS;
+ ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
+ ctxt->event_callback_cs = __KERNEL_CS;
+ ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
- ctxt->gs_base_kernel = per_cpu_offset(cpu);
-#endif
- ctxt->event_callback_eip =
- (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip =
- (unsigned long)xen_failsafe_callback;
- ctxt->user_regs.cs = __KERNEL_CS;
- per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- }
-#ifdef CONFIG_XEN_PVH
- else {
- /*
- * The vcpu comes on kernel page tables which have the NX pte
- * bit set. This means before DS/SS is touched, NX in
- * EFER must be set. Hence the following assembly glue code.
- */
- ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
- ctxt->user_regs.rdi = cpu;
- ctxt->user_regs.rsi = true; /* entry == true */
- }
+ ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
+ ctxt->event_callback_eip =
+ (unsigned long)xen_hypervisor_callback;
+ ctxt->failsafe_callback_eip =
+ (unsigned long)xen_failsafe_callback;
+ ctxt->user_regs.cs = __KERNEL_CS;
+ per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
+
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c5c16dc4f694..9beef333584a 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -21,12 +21,4 @@ static inline int xen_smp_intr_init(unsigned int cpu)
static inline void xen_smp_intr_free(unsigned int cpu) {}
#endif /* CONFIG_SMP */
-#ifdef CONFIG_XEN_PVH
-extern void xen_pvh_early_cpu_init(int cpu, bool entry);
-#else
-static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
-{
-}
-#endif
-
#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7f8d8abf4c1a..37794e42b67d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -16,25 +16,6 @@
#include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h>
-#ifdef CONFIG_XEN_PVH
-#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
-/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
- * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
- * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
- */
-#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
- (1 << XENFEAT_auto_translated_physmap) | \
- (1 << XENFEAT_supervisor_mode_kernel) | \
- (1 << XENFEAT_hvm_callback_vector))
-/* The XENFEAT_writable_page_tables is not stricly necessary as we set that
- * up regardless whether this CONFIG option is enabled or not, but it
- * clarifies what the right flags need to be.
- */
-#else
-#define PVH_FEATURES_STR ""
-#define PVH_FEATURES (0)
-#endif
-
__INIT
ENTRY(startup_xen)
cld
@@ -54,41 +35,6 @@ ENTRY(startup_xen)
__FINIT
-#ifdef CONFIG_XEN_PVH
-/*
- * xen_pvh_early_cpu_init() - early PVH VCPU initialization
- * @cpu: this cpu number (%rdi)
- * @entry: true if this is a secondary vcpu coming up on this entry
- * point, false if this is the boot CPU being initialized for
- * the first time (%rsi)
- *
- * Note: This is called as a function on the boot CPU, and is the entry point
- * on the secondary CPU.
- */
-ENTRY(xen_pvh_early_cpu_init)
- mov %rsi, %r11
-
- /* Gather features to see if NX implemented. */
- mov $0x80000001, %eax
- cpuid
- mov %edx, %esi
-
- mov $MSR_EFER, %ecx
- rdmsr
- bts $_EFER_SCE, %eax
-
- bt $20, %esi
- jnc 1f /* No NX, skip setting it */
- bts $_EFER_NX, %eax
-1: wrmsr
-#ifdef CONFIG_SMP
- cmp $0, %r11b
- jne cpu_bringup_and_idle
-#endif
- ret
-
-#endif /* CONFIG_XEN_PVH */
-
.pushsection .text
.balign PAGE_SIZE
ENTRY(hypercall_page)
@@ -114,10 +60,10 @@ ENTRY(hypercall_page)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
- ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
- (1 << XENFEAT_writable_page_tables) |
- (1 << XENFEAT_dom0))
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
+ .ascii "!writable_page_tables|pae_pgdir_above_4gb")
+ ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
+ .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index ac0a2b0f9e62..f6a41c41ebc7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -146,5 +146,4 @@ __visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void);
-void xen_pvh_secondary_vcpu_init(int cpu);
#endif /* XEN_OPS_H */
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
new file mode 100644
index 000000000000..5e246716d58f
--- /dev/null
+++ b/arch/x86/xen/xen-pvh.S
@@ -0,0 +1,161 @@
+/*
+ * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ .code32
+ .text
+#define _pa(x) ((x) - __START_KERNEL_map)
+
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/asm.h>
+#include <asm/boot.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <xen/interface/elfnote.h>
+
+ __HEAD
+
+/*
+ * Entry point for PVH guests.
+ *
+ * Xen ABI specifies the following register state when we come here:
+ *
+ * - `ebx`: contains the physical memory address where the loader has placed
+ * the boot start info structure.
+ * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
+ * - `cr4`: all bits are cleared.
+ * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
+ * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
+ * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
+ * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
+ * unspecified.
+ * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
+ * of '0x67'.
+ * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
+ * Bit 8 (TF) must be cleared. Other bits are all unspecified.
+ *
+ * All other processor registers and flag bits are unspecified. The OS is in
+ * charge of setting up it's own stack, GDT and IDT.
+ */
+
+ENTRY(pvh_start_xen)
+ cld
+
+ lgdt (_pa(gdt))
+
+ mov $(__BOOT_DS),%eax
+ mov %eax,%ds
+ mov %eax,%es
+ mov %eax,%ss
+
+ /* Stash hvm_start_info. */
+ mov $_pa(pvh_start_info), %edi
+ mov %ebx, %esi
+ mov _pa(pvh_start_info_sz), %ecx
+ shr $2,%ecx
+ rep
+ movsl
+
+ mov $_pa(early_stack_end), %esp
+
+ /* Enable PAE mode. */
+ mov %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ mov %eax, %cr4
+
+#ifdef CONFIG_X86_64
+ /* Enable Long mode. */
+ mov $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* Enable pre-constructed page tables. */
+ mov $_pa(init_level4_pgt), %eax
+ mov %eax, %cr3
+ mov $(X86_CR0_PG | X86_CR0_PE), %eax
+ mov %eax, %cr0
+
+ /* Jump to 64-bit mode. */
+ ljmp $__KERNEL_CS, $_pa(1f)
+
+ /* 64-bit entry point. */
+ .code64
+1:
+ call xen_prepare_pvh
+
+ /* startup_64 expects boot_params in %rsi. */
+ mov $_pa(pvh_bootparams), %rsi
+ mov $_pa(startup_64), %rax
+ jmp *%rax
+
+#else /* CONFIG_X86_64 */
+
+ call mk_early_pgtbl_32
+
+ mov $_pa(initial_page_table), %eax
+ mov %eax, %cr3
+
+ mov %cr0, %eax
+ or $(X86_CR0_PG | X86_CR0_PE), %eax
+ mov %eax, %cr0
+
+ ljmp $__BOOT_CS, $1f
+1:
+ call xen_prepare_pvh
+ mov $_pa(pvh_bootparams), %esi
+
+ /* startup_32 doesn't expect paging and PAE to be on. */
+ ljmp $__BOOT_CS, $_pa(2f)
+2:
+ mov %cr0, %eax
+ and $~X86_CR0_PG, %eax
+ mov %eax, %cr0
+ mov %cr4, %eax
+ and $~X86_CR4_PAE, %eax
+ mov %eax, %cr4
+
+ ljmp $__BOOT_CS, $_pa(startup_32)
+#endif
+END(pvh_start_xen)
+
+ .section ".init.data","aw"
+ .balign 8
+gdt:
+ .word gdt_end - gdt_start
+ .long _pa(gdt_start)
+ .word 0
+gdt_start:
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x0000000000000000 /* reserved */
+#ifdef CONFIG_X86_64
+ .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */
+#else
+ .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */
+#endif
+ .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */
+gdt_end:
+
+ .balign 4
+early_stack:
+ .fill 256, 1, 0
+early_stack_end:
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
+ _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 47b481095d77..f9bcf4a665bc 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -67,6 +67,7 @@ module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_stall_timeout_msecs = 60000;
module_param(rx_stall_timeout_msecs, uint, 0444);
+#define MAX_QUEUES_DEFAULT 8
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
@@ -1622,11 +1623,12 @@ static int __init netback_init(void)
if (!xen_domain())
return -ENODEV;
- /* Allow as many queues as there are CPUs if user has not
+ /* Allow as many queues as there are CPUs but max. 8 if user has not
* specified a value.
*/
if (xenvif_max_queues == 0)
- xenvif_max_queues = num_online_cpus();
+ xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
+ num_online_cpus());
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cf82b5b42056..1c5c28b97efe 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -57,6 +57,7 @@
#include <xen/interface/grant_table.h>
/* Module parameters */
+#define MAX_QUEUES_DEFAULT 8
static unsigned int xennet_max_queues;
module_param_named(max_queues, xennet_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
@@ -2162,11 +2163,12 @@ static int __init netif_init(void)
pr_info("Initialising Xen virtual ethernet driver\n");
- /* Allow as many queues as there are CPUs if user has not
+ /* Allow as many queues as there are CPUs inut max. 8 if user has not
* specified a value.
*/
if (xennet_max_queues == 0)
- xennet_max_queues = num_online_cpus();
+ xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
+ num_online_cpus());
return xenbus_register_frontend(&netfront_driver);
}
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5676aefdf2bc..0bab60a37464 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -107,7 +107,7 @@ static int __init setup_vcpu_hotplug_event(void)
.notifier_call = setup_cpu_watcher };
#ifdef CONFIG_X86
- if (!xen_pv_domain())
+ if (!xen_pv_domain() && !xen_pvh_domain())
#else
if (!xen_domain())
#endif
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index fd8e872d2943..6a53577772c9 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1704,7 +1704,6 @@ void __init xen_init_IRQ(void)
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
- /* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bb36b1e1dbcc..d6786b87e13b 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1146,13 +1146,13 @@ EXPORT_SYMBOL_GPL(gnttab_init);
static int __gnttab_init(void)
{
+ if (!xen_domain())
+ return -ENODEV;
+
/* Delay grant-table initialization in the PV on HVM case */
- if (xen_hvm_domain())
+ if (xen_hvm_domain() && !xen_pvh_domain())
return 0;
- if (!xen_pv_domain())
- return -ENODEV;
-
return gnttab_init();
}
/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 26e5e8507f03..357a8db859c9 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -277,7 +277,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
err = xenbus_transaction_start(&xbt);
if (err)
return;
- if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+ if (xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key) < 0) {
pr_err("Unable to read sysrq code in control/sysrq\n");
xenbus_transaction_end(xbt, 1);
return;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 056da6ee1a35..915d77785193 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -259,53 +259,34 @@ int xenbus_frontend_closed(struct xenbus_device *dev)
}
EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
-/**
- * Return the path to the error node for the given device, or NULL on failure.
- * If the value returned is non-NULL, then it is the caller's to kfree.
- */
-static char *error_path(struct xenbus_device *dev)
-{
- return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
-}
-
-
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap)
{
unsigned int len;
- char *printf_buffer = NULL;
- char *path_buffer = NULL;
+ char *printf_buffer;
+ char *path_buffer;
#define PRINTF_BUFFER_SIZE 4096
+
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
- if (printf_buffer == NULL)
- goto fail;
+ if (!printf_buffer)
+ return;
len = sprintf(printf_buffer, "%i ", -err);
- vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+ vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
dev_err(&dev->dev, "%s\n", printf_buffer);
- path_buffer = error_path(dev);
-
- if (path_buffer == NULL) {
+ path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+ if (!path_buffer ||
+ xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer))
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
- dev->nodename, printf_buffer);
- goto fail;
- }
+ dev->nodename, printf_buffer);
- if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
- dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
- dev->nodename, printf_buffer);
- goto fail;
- }
-
-fail:
kfree(printf_buffer);
kfree(path_buffer);
}
-
/**
* xenbus_dev_error
* @dev: xenbus device
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index f90b03454659..9e9f9bf7c66d 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -193,9 +193,19 @@
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
/*
+ * Physical entry point into the kernel.
+ *
+ * 32bit entry point into the kernel. When requested to launch the
+ * guest kernel in a HVM container, Xen will use this entry point to
+ * launch the guest in 32bit protected mode with paging disabled.
+ * Ignored otherwise.
+ */
+#define XEN_ELFNOTE_PHYS32_ENTRY 18
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
diff --git a/include/xen/interface/hvm/hvm_vcpu.h b/include/xen/interface/hvm/hvm_vcpu.h
new file mode 100644
index 000000000000..32ca83edd44d
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_vcpu.h
@@ -0,0 +1,143 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2015, Roger Pau Monne <roger.pau@citrix.com>
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__
+#define __XEN_PUBLIC_HVM_HVM_VCPU_H__
+
+#include "../xen.h"
+
+struct vcpu_hvm_x86_32 {
+ uint32_t eax;
+ uint32_t ecx;
+ uint32_t edx;
+ uint32_t ebx;
+ uint32_t esp;
+ uint32_t ebp;
+ uint32_t esi;
+ uint32_t edi;
+ uint32_t eip;
+ uint32_t eflags;
+
+ uint32_t cr0;
+ uint32_t cr3;
+ uint32_t cr4;
+
+ uint32_t pad1;
+
+ /*
+ * EFER should only be used to set the NXE bit (if required)
+ * when starting a vCPU in 32bit mode with paging enabled or
+ * to set the LME/LMA bits in order to start the vCPU in
+ * compatibility mode.
+ */
+ uint64_t efer;
+
+ uint32_t cs_base;
+ uint32_t ds_base;
+ uint32_t ss_base;
+ uint32_t es_base;
+ uint32_t tr_base;
+ uint32_t cs_limit;
+ uint32_t ds_limit;
+ uint32_t ss_limit;
+ uint32_t es_limit;
+ uint32_t tr_limit;
+ uint16_t cs_ar;
+ uint16_t ds_ar;
+ uint16_t ss_ar;
+ uint16_t es_ar;
+ uint16_t tr_ar;
+
+ uint16_t pad2[3];
+};
+
+/*
+ * The layout of the _ar fields of the segment registers is the
+ * following:
+ *
+ * Bits [0,3]: type (bits 40-43).
+ * Bit 4: s (descriptor type, bit 44).
+ * Bit [5,6]: dpl (descriptor privilege level, bits 45-46).
+ * Bit 7: p (segment-present, bit 47).
+ * Bit 8: avl (available for system software, bit 52).
+ * Bit 9: l (64-bit code segment, bit 53).
+ * Bit 10: db (meaning depends on the segment, bit 54).
+ * Bit 11: g (granularity, bit 55)
+ * Bits [12,15]: unused, must be blank.
+ *
+ * A more complete description of the meaning of this fields can be
+ * obtained from the Intel SDM, Volume 3, section 3.4.5.
+ */
+
+struct vcpu_hvm_x86_64 {
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t rip;
+ uint64_t rflags;
+
+ uint64_t cr0;
+ uint64_t cr3;
+ uint64_t cr4;
+ uint64_t efer;
+
+ /*
+ * Using VCPU_HVM_MODE_64B implies that the vCPU is launched
+ * directly in long mode, so the cached parts of the segment
+ * registers get set to match that environment.
+ *
+ * If the user wants to launch the vCPU in compatibility mode
+ * the 32-bit structure should be used instead.
+ */
+};
+
+struct vcpu_hvm_context {
+#define VCPU_HVM_MODE_32B 0 /* 32bit fields of the structure will be used. */
+#define VCPU_HVM_MODE_64B 1 /* 64bit fields of the structure will be used. */
+ uint32_t mode;
+
+ uint32_t pad;
+
+ /* CPU registers. */
+ union {
+ struct vcpu_hvm_x86_32 x86_32;
+ struct vcpu_hvm_x86_64 x86_64;
+ } cpu_regs;
+};
+typedef struct vcpu_hvm_context vcpu_hvm_context_t;
+
+#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/hvm/start_info.h b/include/xen/interface/hvm/start_info.h
new file mode 100644
index 000000000000..648415976ead
--- /dev/null
+++ b/include/xen/interface/hvm/start_info.h
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2016, Citrix Systems, Inc.
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__
+#define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__
+
+/*
+ * Start of day structure passed to PVH guests and to HVM guests in %ebx.
+ *
+ * NOTE: nothing will be loaded at physical address 0, so a 0 value in any
+ * of the address fields should be treated as not present.
+ *
+ * 0 +----------------+
+ * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE
+ * | | ("xEn3" with the 0x80 bit of the "E" set).
+ * 4 +----------------+
+ * | version | Version of this structure. Current version is 0. New
+ * | | versions are guaranteed to be backwards-compatible.
+ * 8 +----------------+
+ * | flags | SIF_xxx flags.
+ * 12 +----------------+
+ * | nr_modules | Number of modules passed to the kernel.
+ * 16 +----------------+
+ * | modlist_paddr | Physical address of an array of modules
+ * | | (layout of the structure below).
+ * 24 +----------------+
+ * | cmdline_paddr | Physical address of the command line,
+ * | | a zero-terminated ASCII string.
+ * 32 +----------------+
+ * | rsdp_paddr | Physical address of the RSDP ACPI data structure.
+ * 40 +----------------+
+ *
+ * The layout of each entry in the module structure is the following:
+ *
+ * 0 +----------------+
+ * | paddr | Physical address of the module.
+ * 8 +----------------+
+ * | size | Size of the module in bytes.
+ * 16 +----------------+
+ * | cmdline_paddr | Physical address of the command line,
+ * | | a zero-terminated ASCII string.
+ * 24 +----------------+
+ * | reserved |
+ * 32 +----------------+
+ *
+ * The address and sizes are always a 64bit little endian unsigned integer.
+ *
+ * NB: Xen on x86 will always try to place all the data below the 4GiB
+ * boundary.
+ */
+#define XEN_HVM_START_MAGIC_VALUE 0x336ec578
+
+/*
+ * C representation of the x86/HVM start info layout.
+ *
+ * The canonical definition of this layout is above, this is just a way to
+ * represent the layout described there using C types.
+ */
+struct hvm_start_info {
+ uint32_t magic; /* Contains the magic value 0x336ec578 */
+ /* ("xEn3" with the 0x80 bit of the "E" set).*/
+ uint32_t version; /* Version of this structure. */
+ uint32_t flags; /* SIF_xxx flags. */
+ uint32_t nr_modules; /* Number of modules passed to the kernel. */
+ uint64_t modlist_paddr; /* Physical address of an array of */
+ /* hvm_modlist_entry. */
+ uint64_t cmdline_paddr; /* Physical address of the command line. */
+ uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */
+ /* structure. */
+};
+
+struct hvm_modlist_entry {
+ uint64_t paddr; /* Physical address of the module. */
+ uint64_t size; /* Size of the module in bytes. */
+ uint64_t cmdline_paddr; /* Physical address of the command line. */
+ uint64_t reserved;
+};
+
+#endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */
diff --git a/include/xen/xen.h b/include/xen/xen.h
index f0f0252cff9a..6e8b7fc79801 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -30,16 +30,10 @@ extern enum xen_domain_type xen_domain_type;
#endif /* CONFIG_XEN_DOM0 */
#ifdef CONFIG_XEN_PVH
-/* This functionality exists only for x86. The XEN_PVHVM support exists
- * only in x86 world - hence on ARM it will be always disabled.
- * N.B. ARM guests are neither PV nor HVM nor PVHVM.
- * It's a bit like PVH but is different also (it's further towards the H
- * end of the spectrum than even PVH).
- */
-#include <xen/features.h>
-#define xen_pvh_domain() (xen_pv_domain() && \
- xen_feature(XENFEAT_auto_translated_physmap))
+extern bool xen_pvh;
+#define xen_pvh_domain() (xen_hvm_domain() && xen_pvh)
#else
#define xen_pvh_domain() (0)
#endif
+
#endif /* _XEN_XEN_H */