From 3c2a769de7955ff81818b49d388dd771bf6ae29d Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 14 Dec 2016 15:08:37 -0200 Subject: Documentation, x86, resctrl: Recommend locking for resctrlfs Concurrent write or read/write access from applications to the resctrlfs directory can result in incorrect readouts or setups. Recommend a standard locking scheme for applications to use. Signed-off-by: Marcelo Tosatti Cc: Fenghua Yu Link: http://lkml.kernel.org/r/20161214170835.GA16924@amt.cnet Signed-off-by: Thomas Gleixner --- Documentation/x86/intel_rdt_ui.txt | 114 +++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index d918d268cd72..51cf6fa5591f 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -212,3 +212,117 @@ Finally we move core 4-7 over to the new group and make sure that the kernel and the tasks running there get 50% of the cache. # echo C0 > p0/cpus + +4) Locking between applications + +Certain operations on the resctrl filesystem, composed of read/writes +to/from multiple files, must be atomic. + +As an example, the allocation of an exclusive reservation of L3 cache +involves: + + 1. Read the cbmmasks from each directory + 2. Find a contiguous set of bits in the global CBM bitmask that is clear + in any of the directory cbmmasks + 3. Create a new directory + 4. Set the bits found in step 2 to the new directory "schemata" file + +If two applications attempt to allocate space concurrently then they can +end up allocating the same bits so the reservations are shared instead of +exclusive. + +To coordinate atomic operations on the resctrlfs and to avoid the problem +above, the following locking procedure is recommended: + +Locking is based on flock, which is available in libc and also as a shell +script command + +Write lock: + + A) Take flock(LOCK_EX) on /sys/fs/resctrl + B) Read/write the directory structure. + C) funlock + +Read lock: + + A) Take flock(LOCK_SH) on /sys/fs/resctrl + B) If success read the directory structure. + C) funlock + +Example with bash: + +# Atomically read directory structure +$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl + +# Read directory contents and create new subdirectory + +$ cat create-dir.sh +find /sys/fs/resctrl/ > output.txt +mask = function-of(output.txt) +mkdir /sys/fs/resctrl/newres/ +echo mask > /sys/fs/resctrl/newres/schemata + +$ flock /sys/fs/resctrl/ ./create-dir.sh + +Example with C: + +/* + * Example code do take advisory locks + * before accessing resctrl filesystem + */ +#include +#include + +void resctrl_take_shared_lock(int fd) +{ + int ret; + + /* take shared lock on resctrl filesystem */ + ret = flock(fd, LOCK_SH); + if (ret) { + perror("flock"); + exit(-1); + } +} + +void resctrl_take_exclusive_lock(int fd) +{ + int ret; + + /* release lock on resctrl filesystem */ + ret = flock(fd, LOCK_EX); + if (ret) { + perror("flock"); + exit(-1); + } +} + +void resctrl_release_lock(int fd) +{ + int ret; + + /* take shared lock on resctrl filesystem */ + ret = flock(fd, LOCK_UN); + if (ret) { + perror("flock"); + exit(-1); + } +} + +void main(void) +{ + int fd, ret; + + fd = open("/sys/fs/resctrl", O_DIRECTORY); + if (fd == -1) { + perror("open"); + exit(-1); + } + resctrl_take_shared_lock(fd); + /* code to read directory contents */ + resctrl_release_lock(fd); + + resctrl_take_exclusive_lock(fd); + /* code to read and write directory contents */ + resctrl_release_lock(fd); +} -- cgit v1.2.3 From b4ed1d15b453c86b4b9362128bd7a0ecd95a105c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 25 Dec 2016 14:35:51 +0000 Subject: x86/e820: Make e820_search_gap() static and remove unused variables e820_search_gap() is just used locally now and the 'start_addr' and 'end_addr' parameters are fixed values. Also, 'gapstart' is not checked in this function anymore. So make the function static and remove those unused variables. Signed-off-by: Wei Yang Acked-by: Yinghai Lu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/1482676551-11411-1-git-send-email-richard.weiyang@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/e820.h | 2 -- arch/x86/kernel/e820.c | 16 +++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec23d8e1297c..67313f3a9874 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype); extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(u64 phys_addr, u32 data_len); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 90e8dde3ec26..46f2afd3577a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -580,24 +580,19 @@ static void __init update_e820_saved(void) } #define MAX_GAP_END 0x100000000ull /* - * Search for a gap in the e820 memory space from start_addr to end_addr. + * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) +static int __init e820_search_gap(unsigned long *gapstart, + unsigned long *gapsize) { - unsigned long long last; + unsigned long long last = MAX_GAP_END; int i = e820->nr_map; int found = 0; - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - while (--i >= 0) { unsigned long long start = e820->map[i].addr; unsigned long long end = start + e820->map[i].size; - if (end < start_addr) - continue; - /* * Since "last" is at most 4GB, we know we'll * fit in 32 bits if this condition is true @@ -628,9 +623,8 @@ __init void e820_setup_gap(void) unsigned long gapstart, gapsize; int found; - gapstart = 0x10000000; gapsize = 0x400000; - found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); + found = e820_search_gap(&gapstart, &gapsize); #ifdef CONFIG_X86_64 if (!found) { -- cgit v1.2.3 From c4158ff536439619fa342810cc575ae2c809f03f Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 4 Jan 2017 12:20:33 +0100 Subject: x86/irq, trace: Add __irq_entry annotation to x86's platform IRQ handlers This patch adds the __irq_entry annotation to the default x86 platform IRQ handlers. ftrace's function_graph tracer uses the __irq_entry annotation to notify the entry and return of IRQ handlers. For example, before the patch: 354549.667252 | 3) d..1 | default_idle_call() { 354549.667252 | 3) d..1 | arch_cpu_idle() { 354549.667253 | 3) d..1 | default_idle() { 354549.696886 | 3) d..1 | smp_trace_reschedule_interrupt() { 354549.696886 | 3) d..1 | irq_enter() { 354549.696886 | 3) d..1 | rcu_irq_enter() { After the patch: 366416.254476 | 3) d..1 | arch_cpu_idle() { 366416.254476 | 3) d..1 | default_idle() { 366416.261566 | 3) d..1 ==========> | 366416.261566 | 3) d..1 | smp_trace_reschedule_interrupt() { 366416.261566 | 3) d..1 | irq_enter() { 366416.261566 | 3) d..1 | rcu_irq_enter() { KASAN also uses this annotation. The smp_apic_timer_interrupt() was already annotated. Signed-off-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt (VMware) Cc: Aaron Lu Cc: Andrew Morton Cc: Baoquan He Cc: Borislav Petkov Cc: Claudio Fontana Cc: Denys Vlasenko Cc: Dou Liyang Cc: Gu Zheng Cc: Hidehiro Kawai Cc: Linus Torvalds Cc: Nicolai Stange Cc: Peter Zijlstra (Intel) Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/059fdf437c2f0c09b13c18c8fe4e69999d3ffe69.1483528431.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 8 ++++---- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 6 ++++-- arch/x86/kernel/cpu/mcheck/threshold.c | 4 ++-- arch/x86/kernel/irq.c | 4 ++-- arch/x86/kernel/irq_work.c | 5 +++-- arch/x86/kernel/smp.c | 15 +++++++++------ 8 files changed, 27 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..30b122987906 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1864,14 +1864,14 @@ static void __smp_spurious_interrupt(u8 vector) "should never happen.\n", vector, smp_processor_id()); } -__visible void smp_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } -__visible void smp_trace_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) { u8 vector = ~regs->orig_ax; @@ -1922,14 +1922,14 @@ static void __smp_error_interrupt(struct pt_regs *regs) } -__visible void smp_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { entering_irq(); __smp_error_interrupt(regs); exiting_irq(); } -__visible void smp_trace_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) { entering_irq(); trace_error_apic_entry(ERROR_APIC_VECTOR); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 5d30c5e42bb1..f3557a1eb562 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg) __send_cleanup_vector(data); } -asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) +asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a5fd137417a2..9e655292cf10 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -814,14 +814,14 @@ static inline void __smp_deferred_error_interrupt(void) deferred_error_int_vector(); } -asmlinkage __visible void smp_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) { entering_irq(); __smp_deferred_error_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..772d9400930d 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -404,14 +404,16 @@ static inline void __smp_thermal_interrupt(void) smp_thermal_vector(); } -asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); __smp_thermal_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_trace_thermal_interrupt(struct pt_regs *regs) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 9beb092d68a5..bb0e75eed10a 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void) mce_threshold_vector(); } -asmlinkage __visible void smp_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) { entering_irq(); __smp_threshold_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7c6e9ffe4424..4d8183b5f113 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void) x86_platform_ipi_callback(); } -__visible void smp_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) } #endif -__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 3512ba607361..275487872be2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -9,6 +9,7 @@ #include #include #include +#include static inline void __smp_irq_work_interrupt(void) { @@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void) irq_work_run(); } -__visible void smp_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } -__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 68f8cc222f25..d3c66a15bbde 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void) scheduler_ipi(); } -__visible void smp_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); __smp_reschedule_interrupt(); @@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* * Need to call irq_enter() before calling the trace point. @@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); @@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); -- cgit v1.2.3 From 6606021401032919c559a829a5d273ba1741b434 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 26 Dec 2016 19:20:50 -0800 Subject: selftests/x86: Add a selftest for SYSRET to noncanonical addresses SYSRET to a noncanonical address will blow up on Intel CPUs. Linux needs to prevent this from happening in two major cases, and the criteria will become more complicated when support for larger virtual address spaces is added. A fast-path SYSCALL will fall through to the following instruction using SYSRET without any particular checking. To prevent fall-through to a noncanonical address, Linux prevents the highest canonical page from being mapped. This test case checks a variety of possible maximum addresses to make sure that either we can't map code there or that SYSCALL fall-through works. A slow-path system call can return anywhere. Linux needs to make sure that, if the return address is non-canonical, it won't use SYSRET. This test cases causes sigreturn() to return to a variety of addresses (with RCX == RIP) and makes sure that nothing explodes. Some of this code comes from Kirill Shutemov. Kirill reported the following output with 5-level paging enabled: [RUN] sigreturn to 0x800000000000 [OK] Got SIGSEGV at RIP=0x800000000000 [RUN] sigreturn to 0x1000000000000 [OK] Got SIGSEGV at RIP=0x1000000000000 [RUN] sigreturn to 0x2000000000000 [OK] Got SIGSEGV at RIP=0x2000000000000 [RUN] sigreturn to 0x4000000000000 [OK] Got SIGSEGV at RIP=0x4000000000000 [RUN] sigreturn to 0x8000000000000 [OK] Got SIGSEGV at RIP=0x8000000000000 [RUN] sigreturn to 0x10000000000000 [OK] Got SIGSEGV at RIP=0x10000000000000 [RUN] sigreturn to 0x20000000000000 [OK] Got SIGSEGV at RIP=0x20000000000000 [RUN] sigreturn to 0x40000000000000 [OK] Got SIGSEGV at RIP=0x40000000000000 [RUN] sigreturn to 0x80000000000000 [OK] Got SIGSEGV at RIP=0x80000000000000 [RUN] sigreturn to 0x100000000000000 [OK] Got SIGSEGV at RIP=0x100000000000000 [RUN] sigreturn to 0x200000000000000 [OK] Got SIGSEGV at RIP=0x200000000000000 [RUN] sigreturn to 0x400000000000000 [OK] Got SIGSEGV at RIP=0x400000000000000 [RUN] sigreturn to 0x800000000000000 [OK] Got SIGSEGV at RIP=0x800000000000000 [RUN] sigreturn to 0x1000000000000000 [OK] Got SIGSEGV at RIP=0x1000000000000000 [RUN] sigreturn to 0x2000000000000000 [OK] Got SIGSEGV at RIP=0x2000000000000000 [RUN] sigreturn to 0x4000000000000000 [OK] Got SIGSEGV at RIP=0x4000000000000000 [RUN] sigreturn to 0x8000000000000000 [OK] Got SIGSEGV at RIP=0x8000000000000000 [RUN] Trying a SYSCALL that falls through to 0x7fffffffe000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7ffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x800000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xfffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x2000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x3fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x4000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x8000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x10000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x20000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x3ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x40000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x80000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xfffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x100000000000000 [OK] mremap to 0xfffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x1fffffffffff000 [OK] mremap to 0x1ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x200000000000000 [OK] mremap to 0x1fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x3fffffffffff000 [OK] mremap to 0x3ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x400000000000000 [OK] mremap to 0x3fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x7fffffffffff000 [OK] mremap to 0x7ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x800000000000000 [OK] mremap to 0x7fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0xffffffffffff000 [OK] mremap to 0xfffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x1000000000000000 [OK] mremap to 0xffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x1ffffffffffff000 [OK] mremap to 0x1fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x2000000000000000 [OK] mremap to 0x1ffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x3ffffffffffff000 [OK] mremap to 0x3fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x4000000000000000 [OK] mremap to 0x3ffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x7ffffffffffff000 [OK] mremap to 0x7fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x8000000000000000 [OK] mremap to 0x7ffffffffffff000 failed Signed-off-by: Andy Lutomirski Acked-by: Kirill A. Shutemov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e70bd9a3f90657ba47b755100a20475d038fa26b.1482808435.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sysret_rip.c | 195 +++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/sysret_rip.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 8c1cb423cfe6..25d4067c11e4 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -10,7 +10,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c new file mode 100644 index 000000000000..d85ec5b3671c --- /dev/null +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -0,0 +1,195 @@ +/* + * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls + * Copyright (c) 2014-2016 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +asm ( + ".pushsection \".text\", \"ax\"\n\t" + ".balign 4096\n\t" + "test_page: .globl test_page\n\t" + ".fill 4094,1,0xcc\n\t" + "test_syscall_insn:\n\t" + "syscall\n\t" + ".ifne . - test_page - 4096\n\t" + ".error \"test page is not one page long\"\n\t" + ".endif\n\t" + ".popsection" + ); + +extern const char test_page[]; +static void const *current_test_page_addr = test_page; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs; + +static volatile unsigned long rip; + +static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (rip != ctx->uc_mcontext.gregs[REG_RIP]) { + printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n", + rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]); + fflush(stdout); + _exit(1); + } + + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip); +} + +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + /* Set IP and CX to match so that SYSRET can happen. */ + ctx->uc_mcontext.gregs[REG_RIP] = rip; + ctx->uc_mcontext.gregs[REG_RCX] = rip; + + /* R11 and EFLAGS should already match. */ + assert(ctx->uc_mcontext.gregs[REG_EFL] == + ctx->uc_mcontext.gregs[REG_R11]); + + sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND); + + return; +} + +static void test_sigreturn_to(unsigned long ip) +{ + rip = ip; + printf("[RUN]\tsigreturn to 0x%lx\n", ip); + raise(SIGUSR1); +} + +static jmp_buf jmpbuf; + +static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (rip != ctx->uc_mcontext.gregs[REG_RIP]) { + printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n", + rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]); + fflush(stdout); + _exit(1); + } + + siglongjmp(jmpbuf, 1); +} + +static void test_syscall_fallthrough_to(unsigned long ip) +{ + void *new_address = (void *)(ip - 4096); + void *ret; + + printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip); + + ret = mremap((void *)current_test_page_addr, 4096, 4096, + MREMAP_MAYMOVE | MREMAP_FIXED, new_address); + if (ret == MAP_FAILED) { + if (ip <= (1UL << 47) - PAGE_SIZE) { + err(1, "mremap to %p", new_address); + } else { + printf("[OK]\tmremap to %p failed\n", new_address); + return; + } + } + + if (ret != new_address) + errx(1, "mremap malfunctioned: asked for %p but got %p\n", + new_address, ret); + + current_test_page_addr = new_address; + rip = ip; + + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid), + [syscall_insn] "rm" (ip - 2)); + errx(1, "[FAIL]\tSyscall trampoline returned"); + } + + printf("[OK]\tWe survived\n"); +} + +int main() +{ + /* + * When the kernel returns from a slow-path syscall, it will + * detect whether SYSRET is appropriate. If it incorrectly + * thinks that SYSRET is appropriate when RIP is noncanonical, + * it'll crash on Intel CPUs. + */ + sethandler(SIGUSR1, sigusr1, 0); + for (int i = 47; i < 64; i++) + test_sigreturn_to(1UL< Date: Thu, 5 Jan 2017 15:02:34 +0200 Subject: x86/platform/intel-mid: Remove Moorestown code The Moorestown support code was removed by: a8359e411eb ("x86/mid: Remove Intel Moorestown"). Remove this leftover as well. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105130235.177792-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 1 - .../platform/intel-mid/device_libs/platform_ipc.c | 9 ---- .../intel-mid/device_libs/platform_pmic_gpio.c | 54 ---------------------- 3 files changed, 64 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 90e4f2a6625b..4d8c14a783d5 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -12,7 +12,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o -obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c index a84b73d6c4a0..a428c051e7bb 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c @@ -57,12 +57,3 @@ void __init ipc_device_handler(struct sfi_device_table_entry *pentry, pdev->dev.platform_data = pdata; intel_scu_device_register(pdev); } - -static const struct devs_id pmic_audio_dev_id __initconst = { - .name = "pmic_audio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .device_handler = &ipc_device_handler, -}; - -sfi_device(pmic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c deleted file mode 100644 index e30cb62e3300..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * platform_pmic_gpio.c: PMIC GPIO platform data initialization file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "platform_ipc.h" - -static void __init *pmic_gpio_platform_data(void *info) -{ - static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; - int gpio_base = get_gpio_by_name("pmic_gpio_base"); - - if (gpio_base < 0) - gpio_base = 64; - pmic_gpio_pdata.gpio_base = gpio_base; - pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; - pmic_gpio_pdata.gpiointr = 0xffffeff8; - - return &pmic_gpio_pdata; -} - -static const struct devs_id pmic_gpio_spi_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_SPI, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, -}; - -static const struct devs_id pmic_gpio_ipc_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, - .device_handler = &ipc_device_handler -}; - -sfi_device(pmic_gpio_spi_dev_id); -sfi_device(pmic_gpio_ipc_dev_id); -- cgit v1.2.3 From a01b3391b542aaaed539f9d9d6d0d4d6502ab9c6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Jan 2017 15:02:35 +0200 Subject: x86/platform/intel-mid: Get rid of duplication of IPC handler There is no other device handler than ipc_device_handler() and sfi.c already has a handler for IPC devices. Replace a pointer to custom handler by a flag. Due to this change adjust sfi_handle_ipc_dev() to handle it instead of ipc_device_handler(). Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105130235.177792-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-mid.h | 4 +- arch/x86/platform/intel-mid/device_libs/Makefile | 1 - .../platform/intel-mid/device_libs/platform_ipc.c | 59 ---------------------- .../platform/intel-mid/device_libs/platform_ipc.h | 18 ------- .../intel-mid/device_libs/platform_msic_audio.c | 3 +- .../intel-mid/device_libs/platform_msic_battery.c | 3 +- .../intel-mid/device_libs/platform_msic_gpio.c | 3 +- .../intel-mid/device_libs/platform_msic_ocd.c | 3 +- .../device_libs/platform_msic_power_btn.c | 3 +- .../intel-mid/device_libs/platform_msic_thermal.c | 3 +- arch/x86/platform/intel-mid/sfi.c | 55 ++++++++++++-------- 11 files changed, 40 insertions(+), 115 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.c delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.h diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 49da9f497b90..91ead0cefa76 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -42,10 +42,8 @@ struct devs_id { char name[SFI_NAME_LEN + 1]; u8 type; u8 delay; + u8 msic; void *(*get_platform_data)(void *info); - /* Custom handler for devices */ - void (*device_handler)(struct sfi_device_table_entry *pentry, - struct devs_id *dev); }; #define sfi_device(i) \ diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 4d8c14a783d5..d4af7785844e 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -5,7 +5,6 @@ obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o # WiFi obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o # IPC Devices -obj-y += platform_ipc.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c deleted file mode 100644 index a428c051e7bb..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * platform_ipc.c: IPC platform library file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include -#include -#include -#include -#include "platform_ipc.h" - -void __init ipc_device_handler(struct sfi_device_table_entry *pentry, - struct devs_id *dev) -{ - struct platform_device *pdev; - void *pdata = NULL; - static struct resource res __initdata = { - .name = "IRQ", - .flags = IORESOURCE_IRQ, - }; - - pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", - pentry->name, pentry->irq); - - /* - * We need to call platform init of IPC devices to fill misc_pdata - * structure. It will be used in msic_init for initialization. - */ - if (dev != NULL) - pdata = dev->get_platform_data(pentry); - - /* - * On Medfield the platform device creation is handled by the MSIC - * MFD driver so we don't need to do it here. - */ - if (intel_mid_has_msic()) - return; - - pdev = platform_device_alloc(pentry->name, 0); - if (pdev == NULL) { - pr_err("out of memory for SFI platform device '%s'.\n", - pentry->name); - return; - } - res.start = pentry->irq; - platform_device_add_resources(pdev, &res, 1); - - pdev->dev.platform_data = pdata; - intel_scu_device_register(pdev); -} diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h deleted file mode 100644 index 79bb09d4f718..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * platform_ipc.h: IPC platform library header file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#ifndef _PLATFORM_IPC_H_ -#define _PLATFORM_IPC_H_ - -void __init -ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); - -#endif diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c index cb3490ecb341..d4dc744dd5a5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void *msic_audio_platform_data(void *info) { @@ -40,8 +39,8 @@ static const struct devs_id msic_audio_dev_id __initconst = { .name = "msic_audio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_audio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c index 4f72193939a6..5c3e9919633f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c @@ -19,7 +19,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_battery_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_battery_dev_id __initconst = { .name = "msic_battery", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_battery_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_battery_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c index 70de5b531ba0..9fdb88d460d7 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_gpio_platform_data(void *info) { @@ -41,8 +40,8 @@ static const struct devs_id msic_gpio_dev_id __initconst = { .name = "msic_gpio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_gpio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_gpio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c index 3d7c2011b6cf..7ae37cdbf256 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_ocd_platform_data(void *info) { @@ -42,8 +41,8 @@ static const struct devs_id msic_ocd_dev_id __initconst = { .name = "msic_ocd", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_ocd_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_ocd_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c index 038f618fbc52..96809b98cf69 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c @@ -18,7 +18,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_power_btn_platform_data(void *info) { @@ -29,8 +28,8 @@ static const struct devs_id msic_power_btn_dev_id __initconst = { .name = "msic_power_btn", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_power_btn_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_power_btn_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c index 114a5755b1e4..3e4167d246cd 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c @@ -19,7 +19,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_thermal_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_thermal_dev_id __initconst = { .name = "msic_thermal", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_thermal_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_thermal_dev_id); diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 051d264fce2e..e8f68f652087 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -335,10 +335,22 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", pentry->name, pentry->irq); + + /* + * We need to call platform init of IPC devices to fill misc_pdata + * structure. It will be used in msic_init for initialization. + */ pdata = intel_mid_sfi_get_pdata(dev, pentry); if (IS_ERR(pdata)) return; + /* + * On Medfield the platform device creation is handled by the MSIC + * MFD driver so we don't need to do it here. + */ + if (dev->msic && intel_mid_has_msic()) + return; + pdev = platform_device_alloc(pentry->name, 0); if (pdev == NULL) { pr_err("out of memory for SFI platform device '%s'.\n", @@ -348,7 +360,10 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, install_irq_resource(pdev, pentry->irq); pdev->dev.platform_data = pdata; - platform_device_add(pdev); + if (dev->delay) + intel_scu_device_register(pdev); + else + platform_device_add(pdev); } static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, @@ -503,27 +518,23 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) if (!dev) continue; - if (dev->device_handler) { - dev->device_handler(pentry, dev); - } else { - switch (pentry->type) { - case SFI_DEV_TYPE_IPC: - sfi_handle_ipc_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SPI: - sfi_handle_spi_dev(pentry, dev); - break; - case SFI_DEV_TYPE_I2C: - sfi_handle_i2c_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SD: - sfi_handle_sd_dev(pentry, dev); - break; - case SFI_DEV_TYPE_UART: - case SFI_DEV_TYPE_HSI: - default: - break; - } + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + sfi_handle_ipc_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SPI: + sfi_handle_spi_dev(pentry, dev); + break; + case SFI_DEV_TYPE_I2C: + sfi_handle_i2c_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SD: + sfi_handle_sd_dev(pentry, dev); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + break; } } return 0; -- cgit v1.2.3 From ecc7ea5dd1409d4e6dfba2f0ff0ee1c6ccd855bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Jan 2017 18:17:17 +0200 Subject: x86/platform/intel-mid: Enable GPIO keys on Merrifield The Merrifield firmware provides 3 descriptions of buttons connected to GPIO. Append them to the list of supported GPIO keys. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105161717.115261-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index 52534ec29765..74283875c7e8 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -32,6 +32,9 @@ static struct gpio_keys_button gpio_button[] = { {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_MUTE, -1, 1, "mute_enable", EV_KEY, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "volume_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "volume_down", EV_KEY, 0, 20}, {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, -- cgit v1.2.3 From 1e620f9b23e598ab936ece12233e98e97930b692 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 8 Dec 2016 11:44:31 -0500 Subject: x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C The new Xen PVH entry point requires page tables to be setup by the kernel since it is entered with paging disabled. Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be invoked from both the new Xen entry point and the existing startup_32() code. Convert resulting common code to C. Signed-off-by: Boris Ostrovsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: matt@codeblueprint.co.uk Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32.h | 32 ++++++++++ arch/x86/kernel/head32.c | 62 +++++++++++++++++++ arch/x86/kernel/head_32.S | 121 +++----------------------------------- 3 files changed, 101 insertions(+), 114 deletions(-) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b6c0b404898a..fbc73360aea0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -27,6 +27,7 @@ struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } @@ -75,4 +76,35 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f16c55bfc090..e5fb436a6548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) start_kernel(); } + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. Note the upper half of each PMD or PTE are + * always zero at this stage. + */ +void __init mk_early_pgtbl_32(void) +{ +#ifdef __pa +#undef __pa +#endif +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + pte_t pte, *ptep; + int i; + unsigned long *ptr; + /* Enough space to fit pagetables for the low memory linear map */ + const unsigned long limit = __pa(_end) + + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); +#ifdef CONFIG_X86_PAE + pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); +#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +#else + pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); +#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +#endif + + ptep = (pte_t *)__pa(__brk_base); + pte.pte = PTE_IDENT_ATTR; + + while ((pte.pte & PTE_PFN_MASK) < limit) { + + SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); + *pl2p = pl2; +#ifndef CONFIG_X86_PAE + /* Kernel PDE entry */ + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; +#endif + for (i = 0; i < PTRS_PER_PTE; i++) { + *ptep = pte; + pte.pte += PAGE_SIZE; + ptep++; + } + + pl2p++; + } + + ptr = (unsigned long *)__pa(&max_pfn_mapped); + /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ + *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + + ptr = (unsigned long *)__pa(&_brk_end); + *ptr = (unsigned long)ptep + PAGE_OFFSET; +} + diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4e8577d03372..1f85ee8f9439 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -24,6 +24,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -41,43 +42,9 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif #define SIZEOF_PTREGS 17*4 -/* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - /* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able @@ -160,90 +127,15 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) + /* Create early pagetables. */ + call mk_early_pgtbl_32 /* Do early initialization of the fixmap area */ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#ifdef CONFIG_X86_PAE +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#else movl %eax,pa(initial_page_table+0xffc) #endif @@ -666,6 +558,7 @@ ENTRY(setup_once_ref) __PAGE_ALIGNED_BSS .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.globl initial_pg_pmd initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -- cgit v1.2.3 From 12bf98b91f7aa8a9a526309aba645ccdcc470cab Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 5 Jan 2017 17:54:43 +0800 Subject: x86/apic: Fix typos in comments s/ID/IDs/ s/inr_logical_cpuidi/nr_logical_cpuids/ s/generic_processor_info()/__generic_processor_info()/ Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1483610083-24314-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..5c4fdcfda109 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2028,8 +2028,8 @@ void disconnect_bsp_APIC(int virt_wire_setup) /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of - * nr_logical_cpuids is nr_cpu_ids. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. * * NOTE: Reserve 0 for BSP. */ @@ -2094,7 +2094,7 @@ int __generic_processor_info(int apicid, int version, bool enabled) * Since fixing handling of boot_cpu_physical_apicid requires * another discussion and tests on each platform, we leave it * for now and here we use read_apic_id() directly in this - * function, generic_processor_info(). + * function, __generic_processor_info(). */ if (disabled_cpu_apicid != BAD_APICID && disabled_cpu_apicid != read_apic_id() && -- cgit v1.2.3 From 40d9f82750044f846005d2ac4eec65e39c1c0f7c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Dec 2016 14:33:23 -0800 Subject: timekeeping: Remove unused timekeeping_{get,set}_tai_offset() The last caller to timekeeping_set_tai_offset() was in commit 0b5154fb9040 (timekeeping: Simplify tai updating from do_adjtimex, 2013-03-22) and the last caller to timekeeping_get_tai_offset() was in commit 76f4108892d9 (hrtimer: Cleanup hrtimer accessors to the timekepeing state, 2014-07-16). Remove these unused functions now that we handle TAI offsets differently. Cc: John Stultz Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 39 +-------------------------------------- kernel/time/timekeeping.h | 2 -- 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index db087d7e106d..95b258dd75db 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1275,27 +1275,8 @@ error: /* even if we error out, we forwarded the time, so call update */ } EXPORT_SYMBOL(timekeeping_inject_offset); - -/** - * timekeeping_get_tai_offset - Returns current TAI offset from UTC - * - */ -s32 timekeeping_get_tai_offset(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; - s32 ret; - - do { - seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tai_offset; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - return ret; -} - /** - * __timekeeping_set_tai_offset - Lock free worker function + * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic * */ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) @@ -1304,24 +1285,6 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); } -/** - * timekeeping_set_tai_offset - Sets the current TAI offset from UTC - * - */ -void timekeeping_set_tai_offset(s32 tai_offset) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned long flags; - - raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&tk_core.seq); - __timekeeping_set_tai_offset(tk, tai_offset); - timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&tk_core.seq); - raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clock_was_set(); -} - /** * change_clocksource - Swaps clocksources if a new one is available * diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 704f595ce83f..d0914676d4c5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -11,8 +11,6 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); -extern s32 timekeeping_get_tai_offset(void); -extern void timekeeping_set_tai_offset(s32 tai_offset); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); -- cgit v1.2.3 From 914122c389d091a02f7b5476209af715e77ccb73 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Dec 2016 17:45:49 +0100 Subject: x86/apic: Implement set_state_oneshot_stopped() callback When clock_event_device::set_state_oneshot_stopped() is not implemented, hrtimer_cancel() can't stop the clock when there is no more timer in the queue. So the ghost of the freshly cancelled hrtimer haunts us back later with an extra interrupt: -0 [002] d..2 2248.557659: hrtimer_cancel: hrtimer=ffff88021fa92d80 -0 [002] d.h1 2249.303659: local_timer_entry: vector=239 So let's implement this missing callback for the lapic clock. This consist in calling its set_state_shutdown() callback. There don't seem to be a lighter way to stop the clock. Simply writing 0 to APIC_TMICT won't be enough to stop the clock and avoid the extra interrupt, as opposed to what is specified in the specs. We must also mask the timer interrupt in the device. Signed-off-by: Frederic Weisbecker Cc: Borislav Petkov Reviewed-by: Wanpeng Li Reviewed-by: Viresh Kumar Link: http://lkml.kernel.org/r/1483029949-6925-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5c4fdcfda109..fdb9c46227cc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -529,18 +529,19 @@ static void lapic_timer_broadcast(const struct cpumask *mask) * The local apic timer can be used for any function which is CPU local. */ static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP - | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_state_shutdown = lapic_timer_shutdown, - .set_state_periodic = lapic_timer_set_periodic, - .set_state_oneshot = lapic_timer_set_oneshot, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP + | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_state_shutdown = lapic_timer_shutdown, + .set_state_periodic = lapic_timer_set_periodic, + .set_state_oneshot = lapic_timer_set_oneshot, + .set_state_oneshot_stopped = lapic_timer_shutdown, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -- cgit v1.2.3 From f1be6cdaf57ce918828b6cff6ff2b4ea87be7f62 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 7 Jan 2017 14:34:57 +0200 Subject: x86/platform/intel-mid: Make intel_scu_device_register() static There is no need anymore to have intel_scu_device_register() exported. Annotate it with static keyword. While here, rename to intel_scu_ipc_device_register() to use same pattern for all SFI enumerated device register helpers. Signed-off-by: Andy Shevchenko Link: http://lkml.kernel.org/r/20170107123457.53033-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel-mid.h | 1 - arch/x86/platform/intel-mid/sfi.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 91ead0cefa76..fe04491130ae 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -27,7 +27,6 @@ extern void intel_mid_pwr_power_off(void); extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); extern int get_gpio_by_name(const char *name); -extern void intel_scu_device_register(struct platform_device *pdev); extern int __init sfi_parse_mrtc(struct sfi_table_header *table); extern int __init sfi_parse_mtmr(struct sfi_table_header *table); extern int sfi_mrtc_num; diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index e8f68f652087..ce1303830231 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -226,7 +226,7 @@ int get_gpio_by_name(const char *name) return -EINVAL; } -void __init intel_scu_device_register(struct platform_device *pdev) +static void __init intel_scu_ipc_device_register(struct platform_device *pdev) { if (ipc_next_dev == MAX_IPCDEVS) pr_err("too many SCU IPC devices"); @@ -361,7 +361,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pdev->dev.platform_data = pdata; if (dev->delay) - intel_scu_device_register(pdev); + intel_scu_ipc_device_register(pdev); else platform_device_add(pdev); } -- cgit v1.2.3 From cd7f355ac4067177e0a45e7331d11472bd7fd7ca Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 6 Jan 2017 14:18:08 -0800 Subject: perf jvmti: Create libdir directory before installing libperf-jvmti.so The install command for libperf-jvmti.so does not check if libdir exists before installing. This means that when the install command is run: install libperf-jvmti.so '/tmp/test_root/usr/lib64'; libperf-jvmti.so will get installed to /usr/lib64 as a file and break further installation. Fix this by ensuring the directory gets created first. See https://bugzilla.redhat.com/show_bug.cgi?id=1410296 Signed-off-by: Laura Abbott Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: d4dfdf00d43e ("perf jvmti: Plug compilation into perf build") Link: http://lkml.kernel.org/r/1483741088-13543-1-git-send-email-labbott@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8bb16aa9d661..4da19b6ba94a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -661,6 +661,7 @@ ifndef NO_PERF_READ_VDSOX32 endif ifndef NO_JVMTI $(call QUIET_INSTALL, $(LIBJVMTI)) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)'; endif $(call QUIET_INSTALL, libexec) \ -- cgit v1.2.3 From d5f805c09620dadc1a7806fbd46189d183f6c395 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 12:44:05 -0300 Subject: tools lib subcmd: Add missing linux/kernel.h include to subcmd.h As it was getting the BUILD_BUG_ON_ZERO() definition by luck. Cc: Jiri Olsa Cc: Wang Nan Cc: Josh Poimboeuf Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-dh71o31ar72ajck8o2x4aoae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 11c3be3bcce7..37e2d1a6fc2a 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -1,6 +1,7 @@ #ifndef __SUBCMD_PARSE_OPTIONS_H #define __SUBCMD_PARSE_OPTIONS_H +#include #include #include -- cgit v1.2.3 From 7d132caaf9392853ad637c8e6e53333cbeb99aa5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 15:31:48 -0300 Subject: perf machine: Add a kallsyms loading constructor To reduce the boilerplate for searching for functions in the running kernel and modules. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-93iqzayafpaxaguoiwjqezgz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 19 +++++++++++++++++++ tools/perf/util/machine.h | 1 + 2 files changed, 20 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9b33bef54581..747a034d1ff3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -87,6 +87,25 @@ out_delete: return NULL; } +struct machine *machine__new_kallsyms(void) +{ + struct machine *machine = machine__new_host(); + /* + * FIXME: + * 1) MAP__FUNCTION will go away when we stop loading separate maps for + * functions and data objects. + * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely + * ask for not using the kcore parsing code, once this one is fixed + * to create a map per module. + */ + if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + machine__delete(machine); + machine = NULL; + } + + return machine; +} + static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 354de6e56109..a28305029711 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -129,6 +129,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); +struct machine *machine__new_kallsyms(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); -- cgit v1.2.3 From 355637717d575f14169954c3ed31536d45778f08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 15:33:32 -0300 Subject: perf kallsyms: Introduce tool to look for extended symbol information on the running kernel Its similar to doing grep on a /proc/kallsyms, but it also shows extra information like the path to the kernel module and the unrelocated addresses in it, to help in diagnosing problems. It is also helps demonstrate the use of the symbols routines so that tool writers can use them more effectively. Using it: $ perf kallsyms e1000_xmit_frame netif_rx usb_stor_set_xfer_buf e1000_xmit_frame: [e1000e] /lib/modules/4.9.0+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko 0xffffffffc046fc10-0xffffffffc0470bb0 (0x19c80-0x1ac20) netif_rx: [kernel] [kernel.kallsyms] 0xffffffff916f03a0-0xffffffff916f0410 (0xffffffff916f03a0-0xffffffff916f0410) usb_stor_set_xfer_buf: [usb_storage] /lib/modules/4.9.0+/kernel/drivers/usb/storage/usb-storage.ko 0xffffffffc057aea0-0xffffffffc057af19 (0xf10-0xf89) $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-79bk9pakujn4l4vq0f90klv3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 1 + tools/perf/Documentation/perf-kallsyms.txt | 24 +++++++++++ tools/perf/builtin-help.c | 2 +- tools/perf/builtin-kallsyms.c | 67 ++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 7 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-kallsyms.txt create mode 100644 tools/perf/builtin-kallsyms.c diff --git a/tools/perf/Build b/tools/perf/Build index b12d5d1666e3..0b48806f93c2 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -7,6 +7,7 @@ perf-y += builtin-help.o perf-y += builtin-sched.o perf-y += builtin-buildid-list.o perf-y += builtin-buildid-cache.o +perf-y += builtin-kallsyms.o perf-y += builtin-list.o perf-y += builtin-record.o perf-y += builtin-report.o diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt new file mode 100644 index 000000000000..954ea9e21236 --- /dev/null +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -0,0 +1,24 @@ +perf-kallsyms(1) +============== + +NAME +---- +perf-kallsyms - Searches running kernel for symbols + +SYNOPSIS +-------- +[verse] +'perf kallsyms symbol_name[,symbol_name...]' + +DESCRIPTION +----------- +This command searches the running kernel kallsyms file for the given symbol(s) +and prints information about it, including the DSO, the kallsyms begin/end +addresses and the addresses in the ELF kallsyms symbol table (for symbols in +modules). + +OPTIONS +------- +-v:: +--verbose=:: + Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 3bdb2c78a21b..93da24a638be 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -434,7 +434,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) const char * const builtin_help_subcommands[] = { "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", "record", "report", "bench", "stat", "timechart", "top", "annotate", - "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", + "script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data", #ifdef HAVE_LIBELF_SUPPORT "probe", #endif diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c new file mode 100644 index 000000000000..224bfc454b4a --- /dev/null +++ b/tools/perf/builtin-kallsyms.c @@ -0,0 +1,67 @@ +/* + * builtin-kallsyms.c + * + * Builtin command: Look for a symbol in the running kernel and its modules + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "builtin.h" +#include +#include +#include "debug.h" +#include "machine.h" +#include "symbol.h" + +static int __cmd_kallsyms(int argc, const char **argv) +{ + int i; + struct machine *machine = machine__new_kallsyms(); + + if (machine == NULL) { + pr_err("Couldn't read /proc/kallsyms\n"); + return -1; + } + + for (i = 0; i < argc; ++i) { + struct map *map; + struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); + + if (symbol == NULL) { + printf("%s: not found\n", argv[i]); + continue; + } + + printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n", + symbol->name, map->dso->short_name, map->dso->long_name, + map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end), + symbol->start, symbol->end); + } + + machine__delete(machine); + return 0; +} + +int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + const char * const kallsyms_usage[] = { + "perf kallsyms [] symbol_name", + NULL + }; + + argc = parse_options(argc, argv, options, kallsyms_usage, 0); + if (argc < 1) + usage_with_options(kallsyms_usage, options); + + symbol_conf.sort_by_name = true; + symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); + if (symbol__init(NULL) < 0) + return -1; + + return __cmd_kallsyms(argc, argv); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 0bcf68e98ccc..b55f5be486a1 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -23,6 +23,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix); int cmd_evlist(int argc, const char **argv, const char *prefix); int cmd_help(int argc, const char **argv, const char *prefix); int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_kallsyms(int argc, const char **argv, const char *prefix); int cmd_list(int argc, const char **argv, const char *prefix); int cmd_record(int argc, const char **argv, const char *prefix); int cmd_report(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ab5cbaa170d0..fb45613dba9e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -12,6 +12,7 @@ perf-diff mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common +perf-kallsyms mainporcelain common perf-kmem mainporcelain common perf-kvm mainporcelain common perf-list mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aa23b3347d6b..13c8a7db055e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -47,6 +47,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, -- cgit v1.2.3 From 017037ff3d0b11842012878a546fe2df47822259 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Jan 2017 17:26:26 -0300 Subject: perf trace: Allow specifying list of syscalls and events in -e/--expr/--event Makes it easier to specify both events and syscalls (to be formatter strace-like), i.e. previously one would have to do: # perf trace -e nanosleep --event sched:sched_switch usleep 1 Now it is possible to do: # perf trace -e nanosleep,sched:sched_switch usleep 1 0.000 ( 0.021 ms): usleep/17962 nanosleep(rqtp: 0x7ffdedd61ec0) ... 0.021 ( ): sched:sched_switch:usleep:17962 [120] S ==> swapper/1:0 [120]) 0.000 ( 0.066 ms): usleep/17962 ... [continued]: nanosleep()) = 0 # The old style --expr and using both -e and --event continues to work. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ieg6bakub4657l9e6afn85r4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 8 +-- tools/perf/builtin-trace.c | 120 ++++++++++++++++++++++++-------- 2 files changed, 96 insertions(+), 32 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 781b019751a4..afd728672b6f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -35,7 +35,10 @@ OPTIONS -e:: --expr:: - List of syscalls to show, currently only syscall names. +--event:: + List of syscalls and other perf events (tracepoints, HW cache events, + etc) to show. + See 'perf list' for a complete list of events. Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. @@ -135,9 +138,6 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. ---event:: - Trace other events, see 'perf list' for a complete list. - --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 206bf72b77fc..40ef9b293d1b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -40,6 +40,7 @@ #include /* FIXME: Still needed for audit_errno_to_name */ #include +#include #include #include #include @@ -2699,6 +2700,91 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) evsel->handler = handler; } +/* + * XXX: Hackish, just splitting the combined -e+--event (syscalls + * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use + * existing facilities unchanged (trace->ev_qualifier + parse_options()). + * + * It'd be better to introduce a parse_options() variant that would return a + * list with the terms it didn't match to an event... + */ +static int trace__parse_events_option(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct trace *trace = (struct trace *)opt->value; + const char *s = str; + char *sep = NULL, *lists[2] = { NULL, NULL, }; + int len = strlen(str), err = -1, list; + char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); + char group_name[PATH_MAX]; + + if (strace_groups_dir == NULL) + return -1; + + if (*s == '!') { + ++s; + trace->not_ev_qualifier = true; + } + + while (1) { + if ((sep = strchr(s, ',')) != NULL) + *sep = '\0'; + + list = 0; + if (syscalltbl__id(trace->sctbl, s) >= 0) { + list = 1; + } else { + path__join(group_name, sizeof(group_name), strace_groups_dir, s); + if (access(group_name, R_OK) == 0) + list = 1; + } + + if (lists[list]) { + sprintf(lists[list] + strlen(lists[list]), ",%s", s); + } else { + lists[list] = malloc(len); + if (lists[list] == NULL) + goto out; + strcpy(lists[list], s); + } + + if (!sep) + break; + + *sep = ','; + s = sep + 1; + } + + if (lists[1] != NULL) { + struct strlist_config slist_config = { + .dirname = strace_groups_dir, + }; + + trace->ev_qualifier = strlist__new(lists[1], &slist_config); + if (trace->ev_qualifier == NULL) { + fputs("Not enough memory to parse event qualifier", trace->output); + goto out; + } + + if (trace__validate_ev_qualifier(trace)) + goto out; + } + + err = 0; + + if (lists[0]) { + struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option); + err = parse_events_option(&o, lists[0], 0); + } +out: + if (sep) + *sep = ','; + + return err; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char *trace_usage[] = { @@ -2730,15 +2816,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .max_stack = UINT_MAX, }; const char *output_name = NULL; - const char *ev_qualifier_str = NULL; const struct option trace_options[] = { - OPT_CALLBACK(0, "event", &trace.evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option), + OPT_CALLBACK('e', "event", &trace, "event", + "event/syscall selector. use 'perf list' to list available events", + trace__parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), - OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), + OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", + trace__parse_events_option), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", @@ -2863,7 +2949,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - if (!trace.trace_syscalls && ev_qualifier_str) { + if (!trace.trace_syscalls && trace.ev_qualifier) { pr_err("The -e option can't be used with --no-syscalls.\n"); goto out; } @@ -2878,28 +2964,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace.open_id = syscalltbl__id(trace.sctbl, "open"); - if (ev_qualifier_str != NULL) { - const char *s = ev_qualifier_str; - struct strlist_config slist_config = { - .dirname = system_path(STRACE_GROUPS_DIR), - }; - - trace.not_ev_qualifier = *s == '!'; - if (trace.not_ev_qualifier) - ++s; - trace.ev_qualifier = strlist__new(s, &slist_config); - if (trace.ev_qualifier == NULL) { - fputs("Not enough memory to parse event qualifier", - trace.output); - err = -ENOMEM; - goto out_close; - } - - err = trace__validate_ev_qualifier(&trace); - if (err) - goto out_close; - } - err = target__validate(&trace.opts.target); if (err) { target__strerror(&trace.opts.target, err, bf, sizeof(bf)); -- cgit v1.2.3 From e978be9ea2990f1af60fe10eadd2312a6250e0b8 Mon Sep 17 00:00:00 2001 From: Soramichi Akiyama Date: Tue, 10 Jan 2017 10:41:00 -0300 Subject: perf evlist: Fix typo in perf_evlist__start_workload() This patch fixes a typo: s/enable to/unable to/ Signed-off-by: Soramichi AKIYAMA Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: bcf3145fbeb1 ("perf evlist: Enhance perf_evlist__start_workload()") Link: http://lkml.kernel.org/r/20170110200006.e1f7a766b4faf1f107ae2e1b@m.soramichi.jp [ Wasn't applying, fixed it up by hand, added Fixes: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d92e02006fb8..23e6f33edcf2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1797,7 +1797,7 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) */ ret = write(evlist->workload.cork_fd, &bf, 1); if (ret < 0) - perror("enable to write to pipe"); + perror("unable to write to pipe"); close(evlist->workload.cork_fd); return ret; -- cgit v1.2.3 From 9808143ba2e54818a3cf445d9b69b3f5f15451ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:55 +0100 Subject: perf tools: Add unit_number__scnprintf function Add unit_number__scnprintf function to display size units and use it in -m option info message. Before: $ perf record -m 10M ls rounding mmap pages size to 16777216 bytes (4096 pages) ... After: $ perf record -m 10M ls rounding mmap pages size to 16M (4096 pages) ... Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1483955520-29063-2-git-send-email-jolsa@kernel.org [ Rename it to unit_number__scnprintf for consistency ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/unit_number__scnprintf.c | 37 +++++++++++++++++++++++++++++++ tools/perf/util/evlist.c | 8 +++++-- tools/perf/util/util.c | 13 +++++++++++ tools/perf/util/util.h | 1 + 7 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tools/perf/tests/unit_number__scnprintf.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 6676c2dd6dcb..1cb3d9b540e9 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -44,6 +44,7 @@ perf-y += is_printable_array.o perf-y += bitmap.o perf-y += perf-hooks.o perf-y += clang.o +perf-y += unit_number__scnprintf.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a77dcc0d24e3..37e326bfd2dc 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -246,6 +246,10 @@ static struct test generic_tests[] = { .get_desc = test__clang_subtest_get_desc, } }, + { + .desc = "unit_number__scnprintf", + .func = test__unit_number__scnprint, + }, { .func = NULL, }, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index a512f0c8ff5b..1fa9b9d83aa5 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -96,6 +96,7 @@ int test__perf_hooks(int subtest); int test__clang(int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); +int test__unit_number__scnprint(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c new file mode 100644 index 000000000000..623c2aa53c4a --- /dev/null +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -0,0 +1,37 @@ +#include +#include +#include "tests.h" +#include "util.h" +#include "debug.h" + +int test__unit_number__scnprint(int subtest __maybe_unused) +{ + struct { + u64 n; + const char *str; + } test[] = { + { 1, "1B" }, + { 10*1024, "10K" }, + { 20*1024*1024, "20M" }, + { 30*1024*1024*1024ULL, "30G" }, + { 0, "0B" }, + { 0, NULL }, + }; + unsigned i = 0; + + while (test[i].str) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), test[i].n); + + pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n", + test[i].n, test[i].str, buf); + + if (strcmp(test[i].str, buf)) + return TEST_FAIL; + + i++; + } + + return TEST_OK; +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 23e6f33edcf2..dc4df3d2660e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1224,12 +1224,16 @@ static long parse_pages_arg(const char *str, unsigned long min, if (pages == 0 && min == 0) { /* leave number of pages at 0 */ } else if (!is_power_of_2(pages)) { + char buf[100]; + /* round pages up to next power of 2 */ pages = roundup_pow_of_two(pages); if (!pages) return -EINVAL; - pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", - pages * page_size, pages); + + unit_number__scnprintf(buf, sizeof(buf), pages * page_size); + pr_info("rounding mmap pages size to %s (%lu pages)\n", + buf, pages); } if (pages > max) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9ddd98827d12..bf29aed16bd6 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -789,3 +789,16 @@ int is_printable_array(char *p, unsigned int len) } return 1; } + +int unit_number__scnprintf(char *buf, size_t size, u64 n) +{ + char unit[4] = "BKMG"; + int i = 0; + + while (((n / 1024) > 1) && (i < 3)) { + n /= 1024; + i++; + } + + return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1d639e38aa82..6e8be174ec0b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -363,4 +363,5 @@ int is_printable_array(char *p, unsigned int len); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); +int unit_number__scnprintf(char *buf, size_t size, u64 n); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 1b43b70484a9617de5fe2c12e64bea006010ac1c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:56 +0100 Subject: perf record: Add struct switch_output Next patches will add more --switch-output option arguments, so preparing the data holder. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ec10e9427d9..f7e805b30527 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,10 @@ #include #include +struct switch_output { + bool signal; +}; + struct record { struct perf_tool tool; struct record_opts opts; @@ -62,7 +66,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; - bool switch_output; + struct switch_output switch_output; unsigned long long samples; }; @@ -842,11 +846,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); - if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.signal) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) trigger_on(&auxtrace_snapshot_trigger); - if (rec->switch_output) + if (rec->switch_output.signal) trigger_on(&switch_output_trigger); } else { signal(SIGUSR2, SIG_IGN); @@ -1519,7 +1523,7 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), - OPT_BOOLEAN(0, "switch-output", &record.switch_output, + OPT_BOOLEAN(0, "switch-output", &record.switch_output.signal, "Switch output when receive SIGUSR2"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), @@ -1578,7 +1582,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } - if (rec->switch_output) + if (rec->switch_output.signal) rec->timestamp_filename = true; if (!rec->itr) { @@ -1629,7 +1633,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); - } else if (rec->switch_output) { + } else if (rec->switch_output.signal) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching -- cgit v1.2.3 From cb4e1ebb6a398ff5b0067034b0d16566af4d78e8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:57 +0100 Subject: perf record: Change switch-output option to take optional argument Next patches will add --switch-output option arguments, changing the option to allow that and adding its default value to 'signal'. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f7e805b30527..2bf811acaf8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -48,6 +48,8 @@ struct switch_output { bool signal; + const char *str; + bool set; }; struct record { @@ -1356,6 +1358,22 @@ out_free: return ret; } +static int switch_output_setup(struct record *rec) +{ + struct switch_output *s = &rec->switch_output; + + if (!s->set) + return 0; + + if (!strcmp(s->str, "signal")) { + s->signal = true; + pr_debug("switch-output with SIGUSR2 signal\n"); + return 0; + } + + return -1; +} + static const char * const __record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -1523,8 +1541,9 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), - OPT_BOOLEAN(0, "switch-output", &record.switch_output.signal, - "Switch output when receive SIGUSR2"), + OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, + &record.switch_output.set, "signal", + "Switch output when receive SIGUSR2", "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), OPT_END() @@ -1582,6 +1601,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } + if (switch_output_setup(rec)) { + parse_options_usage(record_usage, record_options, "switch-output", 0); + return -EINVAL; + } + if (rec->switch_output.signal) rec->timestamp_filename = true; -- cgit v1.2.3 From dc0c6127c231d4d264570497a916fa19740c915b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:58 +0100 Subject: perf record: Add switch-output size option argument It's now possible to specify the threshold size for perf.data like: $ perf record --switch-output=2G ... Once it's reached, the current data are dumped in to the perf.data. file and session does on. $ perf record --switch-output=2G ... [ perf record: dump data: Woken up 7244 times ] [ perf record: Dump perf.data.2017010214093746 ] ... The size is expected to be a number with appended unit character - B/K/M/G. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 12 +++++- tools/perf/builtin-record.c | 67 +++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5054d9147f0f..3d55d2fd48b3 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -421,9 +421,17 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. ---switch-output:: +--switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one -when receiving a SIGUSR2. +based on 'mode' value: + "signal" - when receiving a SIGUSR2 (default value) or + - when reaching the size threshold, size is expected to + be a number with appended unit character - B/K/M/G + + Note: the precision of the size threshold hugely depends + on your configuration - the number and size of your ring + buffers (-m). It is generally more precise for higher sizes + (like >5M), for lower values expect different sizes. A possible use case is to, given an external event, slice the perf.data file that gets then processed, possibly via a perf script, to decide if that diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2bf811acaf8d..3fa64492ee62 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -47,7 +47,9 @@ #include struct switch_output { + bool enabled; bool signal; + unsigned long size; const char *str; bool set; }; @@ -72,6 +74,23 @@ struct record { unsigned long long samples; }; +static volatile int auxtrace_record__snapshot_started; +static DEFINE_TRIGGER(auxtrace_snapshot_trigger); +static DEFINE_TRIGGER(switch_output_trigger); + +static bool switch_output_signal(struct record *rec) +{ + return rec->switch_output.signal && + trigger_is_ready(&switch_output_trigger); +} + +static bool switch_output_size(struct record *rec) +{ + return rec->switch_output.size && + trigger_is_ready(&switch_output_trigger) && + (rec->bytes_written >= rec->switch_output.size); +} + static int record__write(struct record *rec, void *bf, size_t size) { if (perf_data_file__write(rec->session->file, bf, size) < 0) { @@ -80,6 +99,10 @@ static int record__write(struct record *rec, void *bf, size_t size) } rec->bytes_written += size; + + if (switch_output_size(rec)) + trigger_hit(&switch_output_trigger); + return 0; } @@ -199,10 +222,6 @@ static volatile int done; static volatile int signr = -1; static volatile int child_finished; -static volatile int auxtrace_record__snapshot_started; -static DEFINE_TRIGGER(auxtrace_snapshot_trigger); -static DEFINE_TRIGGER(switch_output_trigger); - static void sig_handler(int sig) { if (sig == SIGCHLD) @@ -848,11 +867,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); - if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.signal) { + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) trigger_on(&auxtrace_snapshot_trigger); - if (rec->switch_output.signal) + if (rec->switch_output.enabled) trigger_on(&switch_output_trigger); } else { signal(SIGUSR2, SIG_IGN); @@ -1361,6 +1380,14 @@ out_free: static int switch_output_setup(struct record *rec) { struct switch_output *s = &rec->switch_output; + static struct parse_tag tags_size[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + unsigned long val; if (!s->set) return 0; @@ -1368,10 +1395,22 @@ static int switch_output_setup(struct record *rec) if (!strcmp(s->str, "signal")) { s->signal = true; pr_debug("switch-output with SIGUSR2 signal\n"); - return 0; + goto enabled; + } + + val = parse_tag_value(s->str, tags_size); + if (val != (unsigned long) -1) { + s->size = val; + pr_debug("switch-output with %s size threshold\n", s->str); + goto enabled; } return -1; + +enabled: + rec->timestamp_filename = true; + s->enabled = true; + return 0; } static const char * const __record_usage[] = { @@ -1542,8 +1581,9 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, - &record.switch_output.set, "signal", - "Switch output when receive SIGUSR2", "signal"), + &record.switch_output.set, "signal,size", + "Switch output when receive SIGUSR2 or cross size threshold", + "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), OPT_END() @@ -1606,9 +1646,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } - if (rec->switch_output.signal) - rec->timestamp_filename = true; - if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); if (err) @@ -1657,7 +1694,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); - } else if (rec->switch_output.signal) { + } else if (rec->switch_output.enabled) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching @@ -1749,6 +1786,8 @@ out: static void snapshot_sig_handler(int sig __maybe_unused) { + struct record *rec = &record; + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { trigger_hit(&auxtrace_snapshot_trigger); auxtrace_record__snapshot_started = 1; @@ -1756,6 +1795,6 @@ static void snapshot_sig_handler(int sig __maybe_unused) trigger_error(&auxtrace_snapshot_trigger); } - if (trigger_is_ready(&switch_output_trigger)) + if (switch_output_signal(rec)) trigger_hit(&switch_output_trigger); } -- cgit v1.2.3 From 0c5824498e8bd5b7d30dc03448cd89efaee4bead Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:59 +0100 Subject: perf record: Add switch-output size warning Adding switch-output size warning if the requested size of lower than the wakeup ring buffer size. $ perf record --switch-output=1K ls WARNING: switch-output data size lower than wakeup kernel buffer size (258K) expect bigger perf.data sizes ... Signed-off-by: Jiri Olsa Suggested-and-Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1483955520-29063-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 21 +++++++++++++++++++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3fa64492ee62..93319e1be3ac 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1377,6 +1377,23 @@ out_free: return ret; } +static void switch_output_size_warn(struct record *rec) +{ + u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); + struct switch_output *s = &rec->switch_output; + + wakeup_size /= 2; + + if (s->size < wakeup_size) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), wakeup_size); + pr_warning("WARNING: switch-output data size lower than " + "wakeup kernel buffer size (%s) " + "expect bigger perf.data sizes\n", buf); + } +} + static int switch_output_setup(struct record *rec) { struct switch_output *s = &rec->switch_output; @@ -1410,6 +1427,10 @@ static int switch_output_setup(struct record *rec) enabled: rec->timestamp_filename = true; s->enabled = true; + + if (s->size && !rec->opts.no_buffering) + switch_output_size_warn(rec); + return 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dc4df3d2660e..b601f2814a30 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1184,7 +1184,7 @@ unsigned long perf_event_mlock_kb_in_pages(void) return pages; } -static size_t perf_evlist__mmap_size(unsigned long pages) +size_t perf_evlist__mmap_size(unsigned long pages) { if (pages == UINT_MAX) pages = perf_event_mlock_kb_in_pages(); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4fd034f22d2f..389b9ccdf8c7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -218,6 +218,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); +size_t perf_evlist__mmap_size(unsigned long pages); + void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); void perf_evlist__toggle_enable(struct perf_evlist *evlist); -- cgit v1.2.3 From bfacbe3bf2443c805aec4c04ecb558d03d0d3ebc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:52:00 +0100 Subject: perf record: Add switch-output time option argument It's now possible to specify the threshold time for perf.data like: $ perf record --switch-output=30s ... Once it's reached, the current data are dumped in to the perf.data. file and session does on. $ perf record --switch-output=30s ... [ perf record: dump data: Woken up 44 times ] [ perf record: Dump perf.data.2017010213043746 ] ... The time is expected to be a number with appended unit character - s/m/h/d. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/builtin-record.c | 44 ++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3d55d2fd48b3..27256bc68eda 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -427,6 +427,8 @@ based on 'mode' value: "signal" - when receiving a SIGUSR2 (default value) or - when reaching the size threshold, size is expected to be a number with appended unit character - B/K/M/G +