diff options
Diffstat (limited to 'include/linux')
35 files changed, 1043 insertions, 259 deletions
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 27b9350052b..85b2482cc73 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -100,7 +100,6 @@ header-y += iso_fs.h header-y += ixjuser.h header-y += jffs2.h header-y += keyctl.h -header-y += kvm.h header-y += limits.h header-y += lock_dlm_plock.h header-y += magic.h @@ -256,6 +255,7 @@ unifdef-y += kd.h unifdef-y += kernelcapi.h unifdef-y += kernel.h unifdef-y += keyboard.h +unifdef-$(CONFIG_HAVE_KVM) += kvm.h unifdef-y += llc.h unifdef-y += loop.h unifdef-y += lp.h diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 1d0ef1ae803..7e3d2859be5 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +extern void pmtimer_wait(unsigned); + #else static inline u32 acpi_pm_read_early(void) diff --git a/include/linux/audit.h b/include/linux/audit.h index c6878169283..bdd6f5de5fc 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -115,6 +115,8 @@ #define AUDIT_MAC_IPSEC_ADDSPD 1413 /* Not used */ #define AUDIT_MAC_IPSEC_DELSPD 1414 /* Not used */ #define AUDIT_MAC_IPSEC_EVENT 1415 /* Audit an IPSec event */ +#define AUDIT_MAC_UNLBL_STCADD 1416 /* NetLabel: add a static label */ +#define AUDIT_MAC_UNLBL_STCDEL 1417 /* NetLabel: del a static label */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 107787aacb6..85778a4b120 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -103,7 +103,7 @@ struct clocksource { #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 /* simplify initialization of mask field */ -#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1) +#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) /** * clocksource_khz2mult - calculates mult from khz and shift @@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* used to install a new clocksource */ extern int clocksource_register(struct clocksource*); +extern void clocksource_unregister(struct clocksource*); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); diff --git a/include/linux/compat.h b/include/linux/compat.h index 0e69d2cf14a..d38655f2be7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp); +asmlinkage long compat_sys_wait4(compat_pid_t pid, + compat_uint_t *stat_addr, int options, + struct compat_rusage *ru); + #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) \ @@ -239,6 +243,17 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, const compat_ulong_t __user *new_nodes); +extern int compat_ptrace_request(struct task_struct *child, + compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); + +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE +extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data); +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ + /* * epoll (fs/eventpoll.c) compat bits follow ... */ diff --git a/include/linux/const.h b/include/linux/const.h index 07b300bfe34..c22c707c455 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -7,13 +7,18 @@ * C code. Therefore we cannot annotate them always with * 'UL' and other type specifiers unilaterally. We * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. */ #ifdef __ASSEMBLY__ #define _AC(X,Y) X +#define _AT(T,X) X #else #define __AC(X,Y) (X##Y) #define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) #endif #endif /* !(_LINUX_CONST_H) */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 85bd790c201..7047f58306a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -218,8 +218,8 @@ int __first_cpu(const cpumask_t *srcp); int __next_cpu(int n, const cpumask_t *srcp); #define next_cpu(n, src) __next_cpu((n), &(src)) #else -#define first_cpu(src) 0 -#define next_cpu(n, src) 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) #endif #define cpumask_of_cpu(cpu) \ diff --git a/include/linux/device.h b/include/linux/device.h index 1880208964d..db375be333c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -84,6 +84,9 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, struct device *bus_find_device(struct bus_type *bus, struct device *start, void *data, int (*match)(struct device *dev, void *data)); +struct device *bus_find_device_by_name(struct bus_type *bus, + struct device *start, + const char *name); int __must_check bus_for_each_drv(struct bus_type *bus, struct device_driver *start, void *data, diff --git a/include/linux/elf.h b/include/linux/elf.h index 576e83bd6d8..7ceb24d87c1 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -355,6 +355,7 @@ typedef struct elf64_shdr { #define NT_AUXV 6 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ /* Note header in a PT_NOTE section */ diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 707f7cb9e79..9cd94bfd07e 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -64,7 +64,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) @@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(struct hpet_data *hd, int timer) } int hpet_alloc(struct hpet_data *); -int hpet_register(struct hpet_task *, int); -int hpet_unregister(struct hpet_task *); -int hpet_control(struct hpet_task *, unsigned int, unsigned long); #endif /* __KERNEL__ */ diff --git a/include/linux/init_ohci1394_dma.h b/include/linux/init_ohci1394_dma.h new file mode 100644 index 00000000000..3c03a4bba5e --- /dev/null +++ b/include/linux/init_ohci1394_dma.h @@ -0,0 +1,4 @@ +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT +extern int __initdata init_ohci1394_dma_early; +extern void __init init_ohci1394_dma_on_all_controllers(void); +#endif diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6187a8567bc..605d237364d 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -8,6 +8,7 @@ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H +#ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/types.h> /* @@ -153,4 +154,5 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a7283c9bead..ff356b2ee47 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -194,6 +194,9 @@ static inline int log_buf_read(int idx) { return 0; } static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } #endif +extern void __attribute__((format(printf, 1, 2))) + early_printk(const char *fmt, ...); + unsigned long int_sqrt(unsigned long); extern int printk_ratelimit(void); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 81891581e89..6168c0a4417 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,15 @@ static inline void kretprobe_assert(struct kretprobe_instance *ri, } } +#ifdef CONFIG_KPROBES_SANITY_TEST +extern int init_test_probes(void); +#else +static inline int init_test_probes(void) +{ + return 0; +} +#endif /* CONFIG_KPROBES_SANITY_TEST */ + extern spinlock_t kretprobe_lock; extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); @@ -227,6 +236,7 @@ void unregister_kretprobe(struct kretprobe *rp); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); + #else /* CONFIG_KPROBES */ #define __kprobes /**/ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 057a7f34ee3..4de4fd2d860 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -9,12 +9,10 @@ #include <asm/types.h> #include <linux/ioctl.h> +#include <asm/kvm.h> #define KVM_API_VERSION 12 -/* Architectural interrupt line count. */ -#define KVM_NR_INTERRUPTS 256 - /* for KVM_CREATE_MEMORY_REGION */ struct kvm_memory_region { __u32 slot; @@ -23,17 +21,19 @@ struct kvm_memory_region { __u64 memory_size; /* bytes */ }; -/* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL - -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ +/* for KVM_SET_USER_MEMORY_REGION */ +struct kvm_userspace_memory_region { + __u32 slot; __u32 flags; __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for kvm_memory_region::flags */ +#define KVM_MEM_LOG_DIRTY_PAGES 1UL + + /* for KVM_IRQ_LINE */ struct kvm_irq_level { /* @@ -45,62 +45,18 @@ struct kvm_irq_level { __u32 level; }; -/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ -struct kvm_pic_state { - __u8 last_irr; /* edge detection */ - __u8 irr; /* interrupt request register */ - __u8 imr; /* interrupt mask register */ - __u8 isr; /* interrupt service register */ - __u8 priority_add; /* highest irq priority */ - __u8 irq_base; - __u8 read_reg_select; - __u8 poll; - __u8 special_mask; - __u8 init_state; - __u8 auto_eoi; - __u8 rotate_on_auto_eoi; - __u8 special_fully_nested_mode; - __u8 init4; /* true if 4 byte init */ - __u8 elcr; /* PIIX edge/trigger selection */ - __u8 elcr_mask; -}; - -#define KVM_IOAPIC_NUM_PINS 24 -struct kvm_ioapic_state { - __u64 base_address; - __u32 ioregsel; - __u32 id; - __u32 irr; - __u32 pad; - union { - __u64 bits; - struct { - __u8 vector; - __u8 delivery_mode:3; - __u8 dest_mode:1; - __u8 delivery_status:1; - __u8 polarity:1; - __u8 remote_irr:1; - __u8 trig_mode:1; - __u8 mask:1; - __u8 reserve:7; - __u8 reserved[4]; - __u8 dest_id; - } fields; - } redirtbl[KVM_IOAPIC_NUM_PINS]; -}; - -#define KVM_IRQCHIP_PIC_MASTER 0 -#define KVM_IRQCHIP_PIC_SLAVE 1 -#define KVM_IRQCHIP_IOAPIC 2 struct kvm_irqchip { __u32 chip_id; __u32 pad; union { char dummy[512]; /* reserving space */ +#ifdef CONFIG_X86 struct kvm_pic_state pic; +#endif +#if defined(CONFIG_X86) || defined(CONFIG_IA64) struct kvm_ioapic_state ioapic; +#endif } chip; }; @@ -116,6 +72,7 @@ struct kvm_irqchip { #define KVM_EXIT_FAIL_ENTRY 9 #define KVM_EXIT_INTR 10 #define KVM_EXIT_SET_TPR 11 +#define KVM_EXIT_TPR_ACCESS 12 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -174,90 +131,17 @@ struct kvm_run { __u32 longmode; __u32 pad; } hypercall; + /* KVM_EXIT_TPR_ACCESS */ + struct { + __u64 rip; + __u32 is_write; + __u32 pad; + } tpr_access; /* Fix the size of the union. */ char padding[256]; }; }; -/* for KVM_GET_REGS and KVM_SET_REGS */ -struct kvm_regs { - /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ - __u64 rax, rbx, rcx, rdx; - __u64 rsi, rdi, rsp, rbp; - __u64 r8, r9, r10, r11; - __u64 r12, r13, r14, r15; - __u64 rip, rflags; -}; - -/* for KVM_GET_FPU and KVM_SET_FPU */ -struct kvm_fpu { - __u8 fpr[8][16]; - __u16 fcw; - __u16 fsw; - __u8 ftwx; /* in fxsave format */ - __u8 pad1; - __u16 last_opcode; - __u64 last_ip; - __u64 last_dp; - __u8 xmm[16][16]; - __u32 mxcsr; - __u32 pad2; -}; - -/* for KVM_GET_LAPIC and KVM_SET_LAPIC */ -#define KVM_APIC_REG_SIZE 0x400 -struct kvm_lapic_state { - char regs[KVM_APIC_REG_SIZE]; -}; - -struct kvm_segment { - __u64 base; - __u32 limit; - __u16 selector; - __u8 type; - __u8 present, dpl, db, s, l, g, avl; - __u8 unusable; - __u8 padding; -}; - -struct kvm_dtable { - __u64 base; - __u16 limit; - __u16 padding[3]; -}; - -/* for KVM_GET_SREGS and KVM_SET_SREGS */ -struct kvm_sregs { - /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ - struct kvm_segment cs, ds, es, fs, gs, ss; - struct kvm_segment tr, ldt; - struct kvm_dtable gdt, idt; - __u64 cr0, cr2, cr3, cr4, cr8; - __u64 efer; - __u64 apic_base; - __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; -}; - -struct kvm_msr_entry { - __u32 index; - __u32 reserved; - __u64 data; -}; - -/* for KVM_GET_MSRS and KVM_SET_MSRS */ -struct kvm_msrs { - __u32 nmsrs; /* number of msrs in entries */ - __u32 pad; - - struct kvm_msr_entry entries[0]; -}; - -/* for KVM_GET_MSR_INDEX_LIST */ -struct kvm_msr_list { - __u32 nmsrs; /* number of msrs in entries */ - __u32 indices[0]; -}; - /* for KVM_TRANSLATE */ struct kvm_translation { /* in */ @@ -302,28 +186,24 @@ struct kvm_dirty_log { }; }; -struct kvm_cpuid_entry { - __u32 function; - __u32 eax; - __u32 ebx; - __u32 ecx; - __u32 edx; - __u32 padding; -}; - -/* for KVM_SET_CPUID */ -struct kvm_cpuid { - __u32 nent; - __u32 padding; - struct kvm_cpuid_entry entries[0]; -}; - /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; __u8 sigset[0]; }; +/* for KVM_TPR_ACCESS_REPORTING */ +struct kvm_tpr_access_ctl { + __u32 enabled; + __u32 flags; + __u32 reserved[8]; +}; + +/* for KVM_SET_VAPIC_ADDR */ +struct kvm_vapic_addr { + __u64 vapic_addr; +}; + #define KVMIO 0xAE /* @@ -347,11 +227,21 @@ struct kvm_signal_mask { */ #define KVM_CAP_IRQCHIP 0 #define KVM_CAP_HLT 1 +#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 +#define KVM_CAP_USER_MEMORY 3 +#define KVM_CAP_SET_TSS_ADDR 4 +#define KVM_CAP_EXT_CPUID 5 +#define KVM_CAP_VAPIC 6 /* * ioctls for VM fds */ #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ + struct kvm_userspace_memory_region) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -359,6 +249,7 @@ struct kvm_signal_mask { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) @@ -384,5 +275,11 @@ struct kvm_signal_mask { #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) #define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) +#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) +#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) +/* Available with KVM_CAP_VAPIC */ +#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) +/* Available with KVM_CAP_VAPIC */ +#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h new file mode 100644 index 00000000000..ea4764b0a2f --- /dev/null +++ b/include/linux/kvm_host.h @@ -0,0 +1,299 @@ +#ifndef __KVM_HOST_H +#define __KVM_HOST_H + +/* + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/hardirq.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/preempt.h> +#include <asm/signal.h> + +#include <linux/kvm.h> +#include <linux/kvm_para.h> + +#include <linux/kvm_types.h> + +#include <asm/kvm_host.h> + +#define KVM_MAX_VCPUS 4 +#define KVM_MEMORY_SLOTS 8 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +#define KVM_PIO_PAGE_OFFSET 1 + +/* + * vcpu->requests bit members + */ +#define KVM_REQ_TLB_FLUSH 0 +#define KVM_REQ_MIGRATE_TIMER 1 +#define KVM_REQ_REPORT_TPR_ACCESS 2 + +struct kvm_vcpu; +extern struct kmem_cache *kvm_vcpu_cache; + +struct kvm_guest_debug { + int enabled; + unsigned long bp[4]; + int singlestep; +}; + +/* + * It would be nice to use something smarter than a linear search, TBD... + * Thankfully we dont expect many devices to register (famous last words :), + * so until then it will suffice. At least its abstracted so we can change + * in one place. + */ +struct kvm_io_bus { + int dev_count; +#define NR_IOBUS_DEVS 6 + struct kvm_io_device *devs[NR_IOBUS_DEVS]; +}; + +void kvm_io_bus_init(struct kvm_io_bus *bus); +void kvm_io_bus_destroy(struct kvm_io_bus *bus); +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr); +void kvm_io_bus_register_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev); + +struct kvm_vcpu { + struct kvm *kvm; + struct preempt_notifier preempt_notifier; + int vcpu_id; + struct mutex mutex; + int cpu; + struct kvm_run *run; + int guest_mode; + unsigned long requests; + struct kvm_guest_debug guest_debug; + int fpu_active; + int guest_fpu_loaded; + wait_queue_head_t wq; + int sigset_active; + sigset_t sigset; + struct kvm_vcpu_stat stat; + +#ifdef CONFIG_HAS_IOMEM + int mmio_needed; + int mmio_read_completed; + int mmio_is_write; + int mmio_size; + unsigned char mmio_data[8]; + gpa_t mmio_phys_addr; +#endif + + struct kvm_vcpu_arch arch; +}; + +struct kvm_memory_slot { + gfn_t base_gfn; + unsigned long npages; + unsigned long flags; + unsigned long *rmap; + unsigned long *dirty_bitmap; + unsigned long userspace_addr; + int user_alloc; +}; + +struct kvm { + struct mutex lock; /* protects the vcpus array and APIC accesses */ + spinlock_t mmu_lock; + struct mm_struct *mm; /* userspace tied to this vm */ + int nmemslots; + struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + + KVM_PRIVATE_MEM_SLOTS]; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct list_head vm_list; + struct file *filp; + struct kvm_io_bus mmio_bus; + struct kvm_io_bus pio_bus; + struct kvm_vm_stat stat; + struct kvm_arch arch; +}; + +/* The guest did something we don't support. */ +#define pr_unimpl(vcpu, fmt, ...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ + current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ + } while (0) + +#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) +#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) + +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); + +void vcpu_load(struct kvm_vcpu *vcpu); +void vcpu_put(struct kvm_vcpu *vcpu); + +void decache_vcpus_on_cpu(int cpu); + + +int kvm_init(void *opaque, unsigned int vcpu_size, + struct module *module); +void kvm_exit(void); + +#define HPA_MSB ((sizeof(hpa_t) * 8) - 1) +#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) +static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); + +extern struct page *bad_page; + +int is_error_page(struct page *page); +int kvm_is_error_hva(unsigned long addr); +int kvm_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + int user_alloc); +int __kvm_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + int user_alloc); +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc); +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); +void kvm_release_page_clean(struct page *page); +void kvm_release_page_dirty(struct page *page); +int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, + int len); +int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len); +int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); +int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, + int offset, int len); +int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, + unsigned long len); +int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); +int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); +int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +void mark_page_dirty(struct kvm *kvm, gfn_t gfn); + +void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_resched(struct kvm_vcpu *vcpu); +void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); +void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); +void kvm_flush_remote_tlbs(struct kvm *kvm); + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); + +int kvm_dev_ioctl_check_extension(long ext); + +int kvm_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log, int *is_dirty); +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log); + +int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct + kvm_userspace_memory_region *mem, + int user_alloc); +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); +void kvm_arch_destroy_vm(struct kvm *kvm); + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr); + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs); +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs); +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg); +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); + +int kvm_arch_init(void *opaque); +void kvm_arch_exit(void); + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); + +int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_arch_hardware_enable(void *garbage); +void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_setup(void); +void kvm_arch_hardware_unsetup(void); +void kvm_arch_check_processor_compat(void *rtn); +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); + +void kvm_free_physmem(struct kvm *kvm); + +struct kvm *kvm_arch_create_vm(void); +void kvm_arch_destroy_vm(struct kvm *kvm); + +int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +int kvm_cpu_has_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_kick(struct kvm_vcpu *vcpu); + +static inline void kvm_guest_enter(void) +{ + account_system_vtime(current); + current->flags |= PF_VCPU; +} + +static inline void kvm_guest_exit(void) +{ + account_system_vtime(current); + current->flags &= ~PF_VCPU; +} + +static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + return slot - kvm->memslots; +} + +static inline gpa_t gfn_to_gpa(gfn_t gfn) +{ + return (gpa_t)gfn << PAGE_SHIFT; +} + +static inline void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) +{ + set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); +} + +enum kvm_stat_kind { + KVM_STAT_VM, + KVM_STAT_VCPU, +}; + +struct kvm_stats_debugfs_item { + const char *name; + int offset; + enum kvm_stat_kind kind; + struct dentry *dentry; +}; +extern struct kvm_stats_debugfs_item debugfs_entries[]; + +#endif diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 3b292565a69..5497aac0d2f 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -2,72 +2,30 @@ #define __LINUX_KVM_PARA_H /* - * Guest OS interface for KVM paravirtualization - * - * Note: this interface is totally experimental, and is certain to change - * as we make progress. + * This header file provides a method for making a hypercall to the host + * Architectures should define: + * - kvm_hypercall0, kvm_hypercall1... + * - kvm_arch_para_features + * - kvm_para_available */ -/* - * Per-VCPU descriptor area shared between guest and host. Writable to - * both guest and host. Registered with the host by the guest when - * a guest acknowledges paravirtual mode. - * - * NOTE: all addresses are guest-physical addresses (gpa), to make it - * easier for the hypervisor to map between the various addresses. - */ -struct kvm_vcpu_para_state { - /* - * API version information for compatibility. If there's any support - * mismatch (too old host trying to execute too new guest) then - * the host will deny entry into paravirtual mode. Any other - * combination (new host + old guest and new host + new guest) - * is supposed to work - new host versions will support all old - * guest API versions. - */ - u32 guest_version; - u32 host_version; - u32 size; - u32 ret; - - /* - * The address of the vm exit instruction (VMCALL or VMMCALL), - * which the host will patch according to the CPU model the - * VM runs on: - */ - u64 hypercall_gpa; - -} __attribute__ ((aligned(PAGE_SIZE))); - -#define KVM_PARA_API_VERSION 1 - -/* - * This is used for an RDMSR's ECX parameter to probe for a KVM host. - * Hopefully no CPU vendor will use up this number. This is placed well - * out of way of the typical space occupied by CPU vendors' MSR indices, - * and we think (or at least hope) it wont be occupied in the future - * either. - */ -#define MSR_KVM_API_MAGIC 0x87655678 +/* Return values for hypercalls */ +#define KVM_ENOSYS 1000 -#define KVM_EINVAL 1 +#define KVM_HC_VAPIC_POLL_IRQ 1 /* - * Hypercall calling convention: - * - * Each hypercall may have 0-6 parameters. - * - * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1 - * - * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention - * order: RDI, RSI, RDX, RCX, R8, R9. - * - * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP. - * (the first 3 are according to the gcc regparm calling convention) - * - * No registers are clobbered by the hypercall, except that the - * return value is in RAX. + * hypercalls use architecture specific */ -#define __NR_hypercalls 0 +#include <asm/kvm_para.h> + +#ifdef __KERNEL__ +static inline int kvm_para_has_feature(unsigned int feature) +{ + if (kvm_arch_para_features() & (1UL << feature)) + return 1; + return 0; +} +#endif /* __KERNEL__ */ +#endif /* __LINUX_KVM_PARA_H */ -#endif diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h new file mode 100644 index 00000000000..1c4e46decb2 --- /dev/null +++ b/include/linux/kvm_types.h @@ -0,0 +1,54 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef __KVM_TYPES_H__ +#define __KVM_TYPES_H__ + +#include <asm/types.h> + +/* + * Address types: + * + * gva - guest virtual address + * gpa - guest physical address + * gfn - guest frame number + * hva - host virtual address + * hpa - host physical address + * hfn - host frame number + */ + +typedef unsigned long gva_t; +typedef u64 gpa_t; +typedef unsigned long gfn_t; + +typedef unsigned long hva_t; +typedef u64 hpa_t; +typedef unsigned long hfn_t; + +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int port; + int size; + int string; + int down; + int rep; +}; + +#endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index ff203dd0291..3faf599ea58 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -13,6 +13,10 @@ #define asmlinkage CPP_ASMLINKAGE #endif +#ifndef asmregparm +# define asmregparm +#endif + #ifndef prevent_tail_call # define prevent_tail_call(ret) do { } while (0) #endif @@ -53,6 +57,10 @@ .size name, .-name #endif +/* If symbol 'name' is treated as a subroutine (gets called, and returns) + * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of + * static analysis tools such as stack depth analyzer. + */ #ifndef ENDPROC #define ENDPROC(name) \ .type name, @function; \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 1897ca223ec..1bba6789a50 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ -#ifndef CONFIG_DEBUG_PAGEALLOC +#ifdef CONFIG_DEBUG_PAGEALLOC +extern int debug_pagealloc_enabled; + +extern void kernel_map_pages(struct page *page, int numpages, int enable); + +static inline void enable_debug_pagealloc(void) +{ + debug_pagealloc_enabled = 1; +} +#else static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} +static inline void enable_debug_pagealloc(void) +{ +} #endif extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); @@ -1146,6 +1158,7 @@ extern int randomize_va_space; #endif const char * arch_vma_name(struct vm_area_struct *vma); +void print_vma_addr(char *prefix, unsigned long rip); struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c6953134836..41f6f28690f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2085,6 +2085,13 @@ #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f +#define PCI_VENDOR_ID_RDC 0x17f3 +#define PCI_DEVICE_ID_RDC_R6020 0x6020 +#define PCI_DEVICE_ID_RDC_R6030 0x6030 +#define PCI_DEVICE_ID_RDC_R6040 0x6040 +#define PCI_DEVICE_ID_RDC_R6060 0x6060 +#define PCI_DEVICE_ID_RDC_R6061 0x6061 + #define PCI_VENDOR_ID_LENOVO 0x17aa #define PCI_VENDOR_ID_ARECA 0x17d3 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 926adaae0f9..00412bb494c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,6 +9,30 @@ #include <asm/percpu.h> +#ifndef PER_CPU_ATTRIBUTES +#define PER_CPU_ATTRIBUTES +#endif + +#ifdef CONFIG_SMP +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp +#else +#define DEFINE_PER_CPU(type, name) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) +#endif + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM #ifdef CONFIG_MODULES diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 3ea5750a0f7..515bff053de 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -129,6 +129,81 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); #define force_successful_syscall_return() do { } while (0) #endif +/* + * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. + * + * These do-nothing inlines are used when the arch does not + * implement single-step. The kerneldoc comments are here + * to document the interface for all arch definitions. + */ + +#ifndef arch_has_single_step +/** + * arch_has_single_step - does this CPU support user-mode single-step? + * + * If this is defined, then there must be function declarations or + * inlines for user_enable_single_step() and user_disable_single_step(). + * arch_has_single_step() should evaluate to nonzero iff the machine + * supports instruction single-step for user mode. + * It can be a constant or it can test a CPU feature bit. + */ +#define arch_has_single_step() (0) + +/** + * user_enable_single_step - single-step in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_single_step() has returned nonzero. + * Set @task so that when it returns to user mode, it will trap after the + * next single instruction executes. If arch_has_block_step() is defined, + * this must clear the effects of user_enable_block_step() too. + */ +static inline void user_enable_single_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} + +/** + * user_disable_single_step - cancel user-mode single-step + * @task: either current or a task stopped in %TASK_TRACED + * + * Clear @task of the effects of user_enable_single_step() and + * user_enable_block_step(). This can be called whether or not either + * of those was ever called on @task, and even if arch_has_single_step() + * returned zero. + */ +static inline void user_disable_single_step(struct task_struct *task) +{ +} +#endif /* arch_has_single_step */ + +#ifndef arch_has_block_step +/** + * arch_has_block_step - does this CPU support user-mode block-step? + * + * If this is defined, then there must be a function declaration or inline + * for user_enable_block_step(), and arch_has_single_step() must be defined + * too. arch_has_block_step() should evaluate to nonzero iff the machine + * supports step-until-branch for user mode. It can be a constant or it + * can test a CPU feature bit. + */ +#define arch_has_block_step() (0) + +/** + * user_enable_block_step - step until branch in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_block_step() has returned nonzero, + * and will never be called when single-instruction stepping is being used. + * Set @task so that when it returns to user mode, it will trap after the + * next branch or trap taken. + */ +static inline void user_enable_block_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} +#endif /* arch_has_block_step */ + #endif #endif diff --git a/include/linux/regset.h b/include/linux/regset.h new file mode 100644 index 00000000000..8abee655622 --- /dev/null +++ b/include/linux/regset.h @@ -0,0 +1,368 @@ +/* + * User-mode machine state access + * + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + */ + +#ifndef _LINUX_REGSET_H +#define _LINUX_REGSET_H 1 + +#include <linux/compiler.h> +#include <linux/types.h> +#include <linux/uaccess.h> +struct task_struct; +struct user_regset; + + +/** + * user_regset_active_fn - type of @active function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * + * Return -%ENODEV if not available on the hardware found. + * Return %0 if no interesting state in this thread. + * Return >%0 number of @size units of interesting state. + * Any get call fetching state beyond that number will + * see the default initialization state for this data, + * so a caller that knows what the default state is need + * not copy it all out. + * This call is optional; the pointer is %NULL if there + * is no inexpensive check to yield a value < @n. + */ +typedef int user_regset_active_fn(struct task_struct *target, + const struct user_regset *regset); + +/** + * user_regset_get_fn - type of @get function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy into + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy into + * + * Fetch register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_get_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); + +/** + * user_regset_set_fn - type of @set function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy from + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy from + * + * Store register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_set_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/** + * user_regset_writeback_fn - type of @writeback function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @immediate: zero if writeback at completion of next context switch is OK + * + * This call is optional; usually the pointer is %NULL. When + * provided, there is some user memory associated with this regset's + * hardware, such as memory backing cached register data on register + * window machines; the regset's data controls what user memory is + * used (e.g. via the stack pointer value). + * + * Write register data back to user memory. If the @immediate flag + * is nonzero, it must be written to the user memory so uaccess or + * access_process_vm() can see it when this call returns; if zero, + * then it must be written back by the time the task completes a + * context switch (as synchronized with wait_task_inactive()). + * Return %0 on success or if there was nothing to do, -%EFAULT for + * a memory problem (bad stack pointer or whatever), or -%EIO for a + * hardware problem. + */ +typedef int user_regset_writeback_fn(struct task_struct *target, + const struct user_regset *regset, + int immediate); + +/** + * struct user_regset - accessible thread CPU state + * @n: Number of slots (registers). + * @size: Size in bytes of a slot (register). + * @align: Required alignment, in bytes. + * @bias: Bias from natural indexing. + * @core_note_type: ELF note @n_type value used in core dumps. + * @get: Function to fetch values. + * @set: Function to store values. + * @active: Function to report if regset is active, or %NULL. + * @writeback: Function to write data back to user memory, or %NULL. + * + * This data structure describes a machine resource we call a register set. + * This is part of the state of an individual thread, not necessarily + * actual CPU registers per se. A register set consists of a number of + * similar slots, given by @n. Each slot is @size bytes, and aligned to + * @align bytes (which is at least @size). + * + * These functions must be called only on the current thread or on a + * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are + * guaranteed will not be woken up and return to user mode, and that we + * have called wait_task_inactive() on. (The target thread always might + * wake up for SIGKILL while these functions are working, in which case + * that thread's user_regset state might be scrambled.) + * + * The @pos argument must be aligned according to @align; the @count + * argument must be a multiple of @size. These functions are not + * responsible for checking for invalid arguments. + * + * When there is a natural value to use as an index, @bias gives the + * difference between the natural index and the slot index for the + * register set. For example, x86 GDT segment descriptors form a regset; + * the segment selector produces a natural index, but only a subset of + * that index space is available as a regset (the TLS slots); subtracting + * @bias from a segment selector index value computes the regset slot. + * + * If nonzero, @core_note_type gives the n_type field (NT_* value) + * of the core file note in which this regset's data appears. + * NT_PRSTATUS is a special case in that the regset data starts at + * offsetof(struct elf_prstatus, pr_reg) into the note data; that is + * part of the per-machine ELF formats userland knows about. In + * other cases, the core file note contains exactly the whole regset + * (@n * @size) and nothing else. The core file note is normally + * omitted when there is an @active function and it returns zero. + */ +struct user_regset { + user_regset_get_fn *get; + user_regset_set_fn *set; + user_regset_active_fn *active; + user_regset_writeback_fn *writeback; + unsigned int n; + unsigned int size; + unsigned int align; + unsigned int bias; + unsigned int core_note_type; +}; + +/** + * struct user_regset_view - available regsets + * @name: Identifier, e.g. UTS_MACHINE string. + * @regsets: Array of @n regsets available in this view. + * @n: Number of elements in @regsets. + * @e_machine: ELF header @e_machine %EM_* value written in core dumps. + * @e_flags: ELF header @e_flags value written in core dumps. + * @ei_osabi: ELF header @e_ident[%EI_OSABI] value written in core dumps. + * + * A regset view is a collection of regsets (&struct user_regset, + * above). This describes all the state of a thread that can be seen + * from a given architecture/ABI environment. More than one view might + * refer to the same &struct user_regset, or more than one regset + * might refer to the same machine-specific state in the thread. For + * example, a 32-bit thread's state could be examined from the 32-bit + * view or from the 64-bit view. Either method reaches the same thread + * register state, doing appropriate widening or truncation. + */ +struct user_regset_view { + const char *name; + const struct user_regset *regsets; + unsigned int n; + u32 e_flags; + u16 e_machine; + u8 ei_osabi; +}; + +/* + * This is documented here rather than at the definition sites because its + * implementation is machine-dependent but its interface is universal. + */ +/** + * task_user_regset_view - Return the process's native regset view. + * @tsk: a thread of the process in question + * + * Return the &struct user_regset_view that is native for the given process. + * For example, what it would access when it called ptrace(). + * Throughout the life of the process, this only changes at exec. + */ +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); + + +/* + * These are helpers for writing regset get/set functions in arch code. + * Because @start_pos and @end_pos are always compile-time constants, + * these are inlined into very little code though they look large. + * + * Use one or more calls sequentially for each chunk of regset data stored + * contiguously in memory. Call with constants for @start_pos and @end_pos, + * giving the range of byte positions in the regset that data corresponds + * to; @end_pos can be -1 if this chunk is at the end of the regset layout. + * Each call updates the arguments to point past its chunk. + */ + +static inline int user_regset_copyout(unsigned int *pos, unsigned int *count, + void **kbuf, + void __user **ubuf, const void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(*kbuf, data, copy); + *kbuf += copy; + } else if (__copy_to_user(*ubuf, data, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, + const void **kbuf, + const void __user **ubuf, void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(data, *kbuf, copy); + *kbuf += copy; + } else if (__copy_from_user(data, *ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +/* + * These two parallel the two above, but for portions of a regset layout + * that always read as all-zero or for which writes are ignored. + */ +static inline int user_regset_copyout_zero(unsigned int *pos, + unsigned int *count, + void **kbuf, void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) { + memset(*kbuf, 0, copy); + *kbuf += copy; + } else if (__clear_user(*ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) + *kbuf += copy; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +/** + * copy_regset_to_user - fetch a thread's user_regset data into user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy into + */ +static inline int copy_regset_to_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_WRITE, data, size)) + return -EIO; + + return regset->get(target, regset, offset, size, NULL, data); +} + +/** + * copy_regset_from_user - store into thread's user_regset data from user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy from + */ +static inline int copy_regset_from_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + const void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_READ, data, size)) + return -EIO; + + return regset->set(target, regset, offset, size, NULL, data); +} + + +#endif /* <linux/regset.h> */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0546e884e..9d4797609aa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void); /* * Does a critical section need to be broken due to another - * task waiting?: + * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * but a general need for low latency) */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define need_lockbreak(lock) ((lock)->break_lock) -#else -# define need_lockbreak(lock) 0 -#endif - -/* - * Does a critical section need to be broken due to another - * task waiting or preemption being signalled: - */ -static inline int lock_need_resched(spinlock_t *lock) +static inline int spin_needbreak(spinlock_t *lock) { - if (need_lockbreak(lock) || need_resched()) - return 1; +#ifdef CONFIG_PREEMPT + return spin_is_contended(lock); +#else return 0; +#endif } /* diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 6080f73fc85..8c2cc4c0252 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -120,16 +120,35 @@ void selinux_get_task_sid(struct task_struct *tsk, u32 *sid); int selinux_string_to_sid(char *str, u32 *sid); /** - * selinux_relabel_packet_permission - check permission to relabel a packet - * @sid: ID value to be applied to network packet (via SECMARK, most likely) + * selinux_secmark_relabel_packet_permission - secmark permission check + * @sid: SECMARK ID value to be applied to network packet * - * Returns 0 if the current task is allowed to label packets with the - * supplied security ID. Note that it is implicit that the packet is always - * being relabeled from the default unlabled value, and that the access - * control decision is made in the AVC. + * Returns 0 if the current task is allowed to set the SECMARK label of + * packets with the supplied security ID. Note that it is implicit that + * the packet is always being relabeled from the default unlabeled value, + * and that the access control decision is made in the AVC. */ -int selinux_relabel_packet_permission(u32 sid); +int selinux_secmark_relabel_packet_permission(u32 sid); +/** + * selinux_secmark_refcount_inc - increments the secmark use counter + * + * SELinux keeps track of the current SECMARK targets in use so it knows + * when to apply SECMARK label access checks to network packets. This + * function incements this reference count to indicate that a new SECMARK + * target has been configured. + */ +void selinux_secmark_refcount_inc(void); + +/** + * selinux_secmark_refcount_dec - decrements the secmark use counter + * + * SELinux keeps track of the current SECMARK targets in use so it knows + * when to apply SECMARK label access checks to network packets. This + * function decements this reference count to indicate that one of the + * existing SECMARK targets has been removed/flushed. + */ +void selinux_secmark_refcount_dec(void); #else static inline int selinux_audit_rule_init(u32 field, u32 op, @@ -184,11 +203,21 @@ static inline int selinux_string_to_sid(const char *str, u32 *sid) return 0; } -static inline int selinux_relabel_packet_permission(u32 sid) +static inline int selinux_secmark_relabel_packet_permission(u32 sid) { return 0; } +static inline void selinux_secmark_refcount_inc(void) +{ + return; +} + +static inline void selinux_secmark_refcount_dec(void) +{ + return; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index c25e66bcecf..55232ccf9cf 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -78,6 +78,8 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); */ void smp_prepare_boot_cpu(void); +extern unsigned int setup_max_cpus; + #else /* !SMP */ /* diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index c376f3b36c8..124449733c5 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,6 +120,12 @@ do { \ #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#ifdef CONFIG_GENERIC_LOCKBREAK +#define spin_is_contended(lock) ((lock)->break_lock) +#else +#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index f6a3a951b79..68d88f71f1a 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,7 +19,7 @@ typedef struct { raw_spinlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -35,7 +35,7 @@ typedef struct { typedef struct { raw_rwlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index ea54c4c9a4e..938234c4a99 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ +#define __raw_spin_is_contended(lock) (((void)(lock), 0)) + #define __raw_read_can_lock(lock) (((void)(lock), 1)) #define __raw_write_can_lock(lock) (((void)(lock), 1)) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4360e081695..40280df2a3d 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -211,9 +211,6 @@ static inline int hibernate(void) { return -ENOSYS; } #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); -struct saved_context; -void __save_processor_state(struct saved_context *ctxt); -void __restore_processor_state(struct saved_context *ctxt); /* kernel/power/main.c */ extern struct blocking_notifier_head pm_chain_head; diff --git a/include/linux/swap.h b/include/linux/swap.h index 4f3838adbb3..2c3ce4c69b2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -6,6 +6,7 @@ #include <linux/mmzone.h> #include <linux/list.h> #include <linux/sched.h> +#include <linux/pagemap.h> #include <asm/atomic.h> #include <asm/page.h> diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9c4ad755d7e..dfbdfb9836f 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct restart_block *parm); static inline void set_ti_thread_flag(struct thread_info *ti, int flag) { - set_bit(flag,&ti->flags); + set_bit(flag, (unsigned long *)&ti->flags); } static inline void clear_ti_thread_flag(struct thread_info *ti, int flag) { - clear_bit(flag,&ti->flags); + clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_set_bit(flag,&ti->flags); + return test_and_set_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_clear_bit(flag,&ti->flags); + return test_and_clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_ti_thread_flag(struct thread_info *ti, int flag) { - return test_bit(flag,&ti->flags); + return test_bit(flag, (unsigned long *)&ti->flags); } #define set_thread_flag(flag) \ diff --git a/include/linux/tick.h b/include/linux/tick.h index f4a1395e05f..0fadf95debe 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -51,8 +51,10 @@ struct tick_sched { unsigned long idle_jiffies; unsigned long idle_calls; unsigned long idle_sleeps; + int idle_active; ktime_t idle_entrytime; ktime_t idle_sleeptime; + ktime_t idle_lastupdate; ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; @@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(void); extern void tick_nohz_restart_sched_tick(void); extern void tick_nohz_update_jiffies(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern void tick_nohz_stop_idle(int cpu); +extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } @@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } +static inline void tick_nohz_stop_idle(int cpu) { } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } # endif /* !NO_HZ */ #endif diff --git a/include/linux/timer.h b/include/linux/timer.h index 78cf899b440..de0e71359ed 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -5,7 +5,7 @@ #include <linux/ktime.h> #include <linux/stddef.h> -struct tvec_t_base_s; +struct tvec_base; struct timer_list { struct list_head entry; @@ -14,7 +14,7 @@ struct timer_list { void (*function)(unsigned long); unsigned long data; - struct tvec_t_base_s *base; + struct tvec_base *base; #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -22,7 +22,7 @@ struct timer_list { #endif }; -extern struct tvec_t_base_s boot_tvec_bases; +extern struct tvec_base boot_tvec_bases; #define TIMER_INITIALIZER(_function, _expires, _data) { \ .function = (_function), \ |