diff options
Diffstat (limited to 'kernel')
50 files changed, 1307 insertions, 851 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index cacd8684c3c4..5f6f3f829b36 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1953,6 +1953,11 @@ out: CONT; \ LDX_MEM_##SIZEOP: \ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ + CONT; \ + LDX_PROBE_MEM_##SIZEOP: \ + bpf_probe_read_kernel(&DST, sizeof(SIZE), \ + (const void *)(long) (SRC + insn->off)); \ + DST = *((SIZE *)&DST); \ CONT; LDST(B, u8) @@ -1960,15 +1965,6 @@ out: LDST(W, u32) LDST(DW, u64) #undef LDST -#define LDX_PROBE(SIZEOP, SIZE) \ - LDX_PROBE_MEM_##SIZEOP: \ - bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ - CONT; - LDX_PROBE(B, 1) - LDX_PROBE(H, 2) - LDX_PROBE(W, 4) - LDX_PROBE(DW, 8) -#undef LDX_PROBE #define ATOMIC_ALU_OP(BOP, KOP) \ case BOP: \ diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4d57c03714f4..71122e01623c 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -222,9 +222,6 @@ next: p = strstr(p+1, name); } - if (!ck_cmdline) - return NULL; - return ck_cmdline; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 950b25c3f210..80782cddb1da 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4257,7 +4257,6 @@ static void perf_event_remove_on_exec(int ctxn) { struct perf_event_context *ctx, *clone_ctx = NULL; struct perf_event *event, *next; - LIST_HEAD(free_list); unsigned long flags; bool modified = false; diff --git a/kernel/fork.c b/kernel/fork.c index 124829ed0163..9d44f2d46c69 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1982,7 +1982,7 @@ static __latent_entropy struct task_struct *copy_process( struct task_struct *p; struct multiprocess_signals delayed; struct file *pidfile = NULL; - u64 clone_flags = args->flags; + const u64 clone_flags = args->flags; struct nsproxy *nsp = current->nsproxy; /* @@ -2071,6 +2071,9 @@ static __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + p->flags &= ~PF_KTHREAD; + if (args->kthread) + p->flags |= PF_KTHREAD; if (args->io_thread) { /* * Mark us an IO worker, and block any signal that isn't @@ -2160,7 +2163,7 @@ static __latent_entropy struct task_struct *copy_process( p->io_context = NULL; audit_set_context(p, NULL); cgroup_fork(p); - if (p->flags & PF_KTHREAD) { + if (args->kthread) { if (!set_kthread_struct(p)) goto bad_fork_cleanup_delayacct; } @@ -2243,7 +2246,7 @@ static __latent_entropy struct task_struct *copy_process( retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); + retval = copy_thread(p, args); if (retval) goto bad_fork_cleanup_io; @@ -2547,11 +2550,21 @@ static inline void init_idle_pids(struct task_struct *idle) } } +static int idle_dummy(void *dummy) +{ + /* This function is never called */ + return 0; +} + struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { - .flags = CLONE_VM, + .flags = CLONE_VM, + .fn = &idle_dummy, + .fn_arg = NULL, + .kthread = 1, + .idle = 1, }; task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); @@ -2582,8 +2595,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, + .fn = fn, + .fn_arg = arg, .io_thread = 1, }; @@ -2687,8 +2700,25 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, + .fn = fn, + .fn_arg = arg, + .kthread = 1, + }; + + return kernel_clone(&args); +} + +/* + * Create a user mode thread. + */ +pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), + .fn = fn, + .fn_arg = arg, }; return kernel_clone(&args); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 02a65d554340..80bfea5dd5c4 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -73,7 +73,7 @@ static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; * hung task is detected: */ unsigned int __read_mostly sysctl_hung_task_panic = - CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; + IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC); static int hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) diff --git a/kernel/kcov.c b/kernel/kcov.c index b3732b210593..e19c84b02452 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -204,8 +204,16 @@ void notrace __sanitizer_cov_trace_pc(void) /* The first 64-bit word is the number of subsequent PCs. */ pos = READ_ONCE(area[0]) + 1; if (likely(pos < t->kcov_size)) { - area[pos] = ip; + /* Previously we write pc before updating pos. However, some + * early interrupt code could bypass check_kcov_mode() check + * and invoke __sanitizer_cov_trace_pc(). If such interrupt is + * raised between writing pc and updating pos, the pc could be + * overitten by the recursive __sanitizer_cov_trace_pc(). + * Update pos before writing pc to avoid such interleaving. + */ WRITE_ONCE(area[0], pos); + barrier(); + area[pos] = ip; } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); @@ -236,11 +244,13 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) start_index = 1 + count * KCOV_WORDS_PER_CMP; end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); if (likely(end_pos <= max_pos)) { + /* See comment in __sanitizer_cov_trace_pc(). */ + WRITE_ONCE(area[0], count + 1); + barrier(); area[start_index] = type; area[start_index + 1] = arg1; area[start_index + 2] = arg2; area[start_index + 3] = ip; - WRITE_ONCE(area[0], count + 1); } } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8b3e9a2014cf..4d34c78334ce 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -768,7 +768,6 @@ static struct page *kimage_alloc_page(struct kimage *image, kimage_free_pages(old_page); continue; } - addr = old_addr; page = old_page; break; } @@ -788,7 +787,6 @@ static int kimage_load_normal_segment(struct kimage *image, unsigned char __user *buf = NULL; unsigned char *kbuf = NULL; - result = 0; if (image->file_mode) kbuf = segment->kbuf; else diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 8347fc158d2b..145321a5e798 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -109,40 +109,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, #endif /* - * arch_kexec_apply_relocations_add - apply relocations of type RELA - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELAs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("RELA relocation unsupported.\n"); - return -ENOEXEC; -} - -/* - * arch_kexec_apply_relocations - apply relocations of type REL - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("REL relocation unsupported.\n"); - return -ENOEXEC; -} - -/* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have * been loaded into separate segments and have been copied elsewhere. @@ -1260,7 +1226,7 @@ int crash_exclude_mem_range(struct crash_mem *mem, return 0; } -int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, +int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { Elf64_Ehdr *ehdr; @@ -1324,7 +1290,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, phdr++; /* Prepare PT_LOAD type program header for kernel text region */ - if (kernel_map) { + if (need_kernel_map) { phdr->p_type = PT_LOAD; phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_vaddr = (unsigned long) _text; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dd58c0be9ce2..f214f8c088ed 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1257,79 +1257,6 @@ void kprobe_busy_end(void) preempt_enable(); } -#if !defined(CONFIG_KRETPROBE_ON_RETHOOK) -static void free_rp_inst_rcu(struct rcu_head *head) -{ - struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu); - - if (refcount_dec_and_test(&ri->rph->ref)) - kfree(ri->rph); - kfree(ri); -} -NOKPROBE_SYMBOL(free_rp_inst_rcu); - -static void recycle_rp_inst(struct kretprobe_instance *ri) -{ - struct kretprobe *rp = get_kretprobe(ri); - - if (likely(rp)) - freelist_add(&ri->freelist, &rp->freelist); - else - call_rcu(&ri->rcu, free_rp_inst_rcu); -} -NOKPROBE_SYMBOL(recycle_rp_inst); - -/* - * This function is called from delayed_put_task_struct() when a task is - * dead and cleaned up to recycle any kretprobe instances associated with - * this task. These left over instances represent probed functions that - * have been called but will never return. - */ -void kprobe_flush_task(struct task_struct *tk) -{ - struct kretprobe_instance *ri; - struct llist_node *node; - - /* Early boot, not yet initialized. */ - if (unlikely(!kprobes_initialized)) - return; - - kprobe_busy_begin(); - - node = __llist_del_all(&tk->kretprobe_instances); - while (node) { - ri = container_of(node, struct kretprobe_instance, llist); - node = node->next; - - recycle_rp_inst(ri); - } - - kprobe_busy_end(); -} -NOKPROBE_SYMBOL(kprobe_flush_task); - -static inline void free_rp_inst(struct kretprobe *rp) -{ - struct kretprobe_instance *ri; - struct freelist_node *node; - int count = 0; - - node = rp->freelist.head; - while (node) { - ri = container_of(node, struct kretprobe_instance, freelist); - node = node->next; - - kfree(ri); - count++; - } - - if (refcount_sub_and_test(count, &rp->rph->ref)) { - kfree(rp->rph); - rp->rph = NULL; - } -} -#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */ - /* Add the new probe to 'ap->list'. */ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) { @@ -1928,6 +1855,77 @@ static struct notifier_block kprobe_exceptions_nb = { #ifdef CONFIG_KRETPROBES #if !defined(CONFIG_KRETPROBE_ON_RETHOOK) +static void free_rp_inst_rcu(struct rcu_head *head) +{ + struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu); + + if (refcount_dec_and_test(&ri->rph->ref)) + kfree(ri->rph); + kfree(ri); +} +NOKPROBE_SYMBOL(free_rp_inst_rcu); + +static void recycle_rp_inst(struct kretprobe_instance *ri) +{ + struct kretprobe *rp = get_kretprobe(ri); + + if (likely(rp)) + freelist_add(&ri->freelist, &rp->freelist); + else + call_rcu(&ri->rcu, free_rp_inst_rcu); +} +NOKPROBE_SYMBOL(recycle_rp_inst); + +/* + * This function is called from delayed_put_task_struct() when a task is + * dead and cleaned up to recycle any kretprobe instances associated with + * this task. These left over instances represent probed functions that + * have been called but will never return. + */ +void kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + struct llist_node *node; + + /* Early boot, not yet initialized. */ + if (unlikely(!kprobes_initialized)) + return; + + kprobe_busy_begin(); + + node = __llist_del_all(&tk->kretprobe_instances); + while (node) { + ri = container_of(node, struct kretprobe_instance, llist); + node = node->next; + + recycle_rp_inst(ri); + } + + kprobe_busy_end(); +} +NOKPROBE_SYMBOL(kprobe_flush_task); + +static inline void free_rp_inst(struct kretprobe *rp) +{ + struct kretprobe_instance *ri; + struct freelist_node *node; + int count = 0; + + node = rp->freelist.head; + while (node) { + ri = container_of(node, struct kretprobe_instance, freelist); + node = node->next; + + kfree(ri); + count++; + } + + if (refcount_sub_and_test(count, &rp->rph->ref)) { + kfree(rp->rph); + rp->rph = NULL; + } +} + /* This assumes the 'tsk' is the current task or the is not running. */ static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk, struct llist_node **cur) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index c172bf92b576..4c4f5a776d80 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -118,7 +118,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, if (func->nop) goto unlock; - klp_arch_set_pc(fregs, (unsigned long)func->new_func); + ftrace_instruction_pointer_set(fregs, (unsigned long)func->new_func); unlock: ftrace_test_recursion_unlock(bit); diff --git a/kernel/module/signing.c b/kernel/module/signing.c index 85c8999dfecf..a2ff4242e623 100644 --- a/kernel/module/signing.c +++ b/kernel/module/signing.c @@ -16,6 +16,9 @@ #include <uapi/linux/module.h> #include "internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "module." + static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); diff --git a/kernel/notifier.c b/kernel/notifier.c index ba005ebf4730..0d5bd62c480e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -20,7 +20,8 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); */ static int notifier_chain_register(struct notifier_block **nl, - struct notifier_block *n) + struct notifier_block *n, + bool unique_priority) { while ((*nl) != NULL) { if (unlikely((*nl) == n)) { @@ -30,6 +31,8 @@ static int notifier_chain_register(struct notifier_block **nl, } if (n->priority > (*nl)->priority) break; + if (n->priority == (*nl)->priority && unique_priority) + return -EBUSY; nl = &((*nl)->next); } n->next = *nl; @@ -144,13 +147,36 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh, int ret; spin_lock_irqsave(&nh->lock, flags); - ret = notifier_chain_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n, false); spin_unlock_irqrestore(&nh->lock, flags); return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); /** + * atomic_notifier_chain_register_unique_prio - Add notifier to an atomic notifier chain + * @nh: Pointer to head of the atomic notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to an atomic notifier chain if there is no other + * notifier registered using the same priority. + * + * Returns 0 on success, %-EEXIST or %-EBUSY on error. + */ +int atomic_notifier_chain_register_unique_prio(struct atomic_notifier_head *nh, + struct notifier_block *n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_register(&nh->head, n, true); + spin_unlock_irqrestore(&nh->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(atomic_notifier_chain_register_unique_prio); + +/** * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain * @nh: Pointer to head of the atomic notifier chain * @n: Entry to remove from notifier chain @@ -204,23 +230,27 @@ int atomic_notifier_call_chain(struct atomic_notifier_head *nh, EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); NOKPROBE_SYMBOL(atomic_notifier_call_chain); +/** + * atomic_notifier_call_chain_is_empty - Check whether notifier chain is empty + * @nh: Pointer to head of the atomic notifier chain + * + * Checks whether notifier chain is empty. + * + * Returns true is notifier chain is empty, false otherwise. + */ +bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh) +{ + return !rcu_access_pointer(nh->head); +} + /* * Blocking notifier chain routines. All access to the chain is * synchronized by an rwsem. */ -/** - * blocking_notifier_chain_register - Add notifier to a blocking notifier chain - * @nh: Pointer to head of the blocking notifier chain - * @n: New entry in notifier chain - * - * Adds a notifier to a blocking notifier chain. - * Must be called in process context. - * - * Returns 0 on success, %-EEXIST on error. - */ -int blocking_notifier_chain_register(struct blocking_notifier_head *nh, - struct notifier_block *n) +static int __blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *n, + bool unique_priority) { int ret; @@ -230,16 +260,49 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, * such times we must not call down_write(). */ if (unlikely(system_state == SYSTEM_BOOTING)) - return notifier_chain_register(&nh->head, n); + return notifier_chain_register(&nh->head, n, unique_priority); down_write(&nh->rwsem); - ret = notifier_chain_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n, unique_priority); up_write(&nh->rwsem); return ret; } + +/** + * blocking_notifier_chain_register - Add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to a blocking notifier chain. + * Must be called in process context. + * + * Returns 0 on success, %-EEXIST on error. + */ +int blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + return __blocking_notifier_chain_register(nh, n, false); +} EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); /** + * blocking_notifier_chain_register_unique_prio - Add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to an blocking notifier chain if there is no other + * notifier registered using the same priority. + * + * Returns 0 on success, %-EEXIST or %-EBUSY on error. + */ +int blocking_notifier_chain_register_unique_prio(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + return __blocking_notifier_chain_register(nh, n, true); +} +EXPORT_SYMBOL_GPL(blocking_notifier_chain_register_unique_prio); + +/** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain * @n: Entry to remove from notifier chain @@ -341,7 +404,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); int raw_notifier_chain_register(struct raw_notifier_head *nh, struct notifier_block *n) { - return notifier_chain_register(&nh->head, n); + return notifier_chain_register(&nh->head, n, false); } EXPORT_SYMBOL_GPL(raw_notifier_chain_register); @@ -420,10 +483,10 @@ int srcu_notifier_chain_register(struct srcu_notifier_head *nh, * such times we must not call mutex_lock(). */ if (unlikely(system_state == SYSTEM_BOOTING)) - return notifier_chain_register(&nh->head, n); + return notifier_chain_register(&nh->head, n, false); mutex_lock(&nh->mutex); - ret = notifier_chain_register(&nh->head, n); + ret = notifier_chain_register(&nh->head, n, false); mutex_unlock(&nh->mutex); return ret; } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a46a3723bc66..f4f8cb0435b4 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -52,7 +52,7 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) /* Name collision forces to do allocation under mutex. */ if (!*pkc) *pkc = kmem_cache_create(name, len, 0, - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); mutex_unlock(&pid_caches_mutex); /* current can fail, but someone else can succeed. */ return READ_ONCE(*pkc); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 938d5c78b421..20a66bf9f465 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -83,7 +83,7 @@ bool hibernation_available(void) { return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION) && - !secretmem_active(); + !secretmem_active() && !cxl_mem_active(); } /** diff --git a/kernel/power/main.c b/kernel/power/main.c index 5242bf2ee469..e3694034b753 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -127,7 +127,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@ -136,6 +138,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6fcdee7e87a5..827075944d28 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -236,7 +236,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem); static bool sleep_state_supported(suspend_state_t state) { - return state == PM_SUSPEND_TO_IDLE || valid_state(state); + return state == PM_SUSPEND_TO_IDLE || + (valid_state(state) && !cxl_mem_active()); } static int platform_suspend_prepare(suspend_state_t state) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a3e1035929b0..ea3dd55709e7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3904,7 +3904,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) } if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible_all(&log_wait); + wake_up_interruptible(&log_wait); } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ccc4b465775b..156a99283b11 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(struct task_struct *task) return true; } -/* Ensure that nothing can wake it up, even SIGKILL */ +/* + * Ensure that nothing can wake it up, even SIGKILL + * + * A task is switched to this state while a ptrace operation is in progress; + * such that the ptrace operation is uninterruptible. + */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; @@ -197,7 +202,7 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { - WRITE_ONCE(task->__state, __TASK_TRACED); + task->jobctl |= JOBCTL_PTRACE_FROZEN; ret = true; } spin_unlock_irq(&task->sighand->siglock); @@ -207,23 +212,21 @@ static bool ptrace_freeze_traced(struct task_struct *task) static void ptrace_unfreeze_traced(struct task_struct *task) { - if (READ_ONCE(task->__state) != __TASK_TRACED) - return; - - WARN_ON(!task->ptrace || task->parent != current); + unsigned long flags; /* - * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. - * Recheck state under the lock to close this race. + * The child may be awake and may have cleared + * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will + * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. */ - spin_lock_irq(&task->sighand->siglock); - if (READ_ONCE(task->__state) == __TASK_TRACED) { - if (__fatal_signal_pending(task)) + if (lock_task_sighand(task, &flags)) { + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; + if (__fatal_signal_pending(task)) { + task->jobctl &= ~TASK_TRACED; wake_up_state(task, __TASK_TRACED); - else - WRITE_ONCE(task->__state, TASK_TRACED); + } + unlock_task_sighand(task, &flags); } - spin_unlock_irq(&task->sighand->siglock); } /** @@ -256,7 +259,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). @@ -266,17 +268,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) { - if (!wait_task_inactive(child, __TASK_TRACED)) { - /* - * This can only happen if may_ptrace_stop() fails and - * ptrace_stop() changes ->state back to TASK_RUNNING, - * so we should not worry about leaking __TASK_TRACED. - */ - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); - ret = -ESRCH; - } - } + if (!ret && !ignore_state && + WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED))) + ret = -ESRCH; return ret; } @@ -475,8 +469,10 @@ static int ptrace_attach(struct task_struct *task, long request, * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task) && - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { + task->jobctl &= ~JOBCTL_STOPPED; signal_wake_up_state(task, __TASK_STOPPED); + } spin_unlock(&task->sighand->siglock); @@ -829,11 +825,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, } #endif -#ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) -#else -#define is_singlestep(request) 0 -#endif #ifdef PTRACE_SINGLEBLOCK #define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) @@ -850,8 +842,6 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { - bool need_siglock; - if (!valid_signal(data)) return -EIO; @@ -887,18 +877,12 @@ static int ptrace_resume(struct task_struct *child, long request, * Note that we need siglock even if ->exit_code == data and/or this * status was not reported yet, the new status must not be cleared by * wait_task_stopped() after resume. - * - * If data == 0 we do not care if wait_task_stopped() reports the old - * status and clears the code too; this can't race with the tracee, it - * takes siglock after resume. */ - need_siglock = data && !thread_group_empty(current); - if (need_siglock) - spin_lock_irq(&child->sighand->siglock); + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; + child->jobctl &= ~JOBCTL_TRACED; wake_up_state(child, __TASK_TRACED); - if (need_siglock) - spin_unlock_irq(&child->sighand->siglock); + spin_unlock_irq(&child->sighand->siglock); return 0; } @@ -1221,9 +1205,7 @@ int ptrace_request(struct task_struct *child, long request, } #endif -#ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: -#endif #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif @@ -1236,9 +1218,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: @@ -1285,10 +1266,6 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -#ifndef arch_ptrace_attach -#define arch_ptrace_attach(child) do { } while (0) -#endif - SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) { @@ -1297,8 +1274,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); - if (!ret) - arch_ptrace_attach(current); goto out; } @@ -1310,12 +1285,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } @@ -1455,12 +1424,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); - /* - * Some architectures need to do book-keeping after - * a ptrace attach. - */ - if (!ret) - arch_ptrace_attach(child); goto out_put_task_struct; } diff --git a/kernel/reboot.c b/kernel/reboot.c index 44228a93742b..a091145ee710 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -48,12 +48,20 @@ int reboot_cpu; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; +struct sys_off_handler { + struct notifier_block nb; + int (*sys_off_cb)(struct sys_off_data *data); + void *cb_data; + enum sys_off_mode mode; + bool blocking; + void *list; +}; + /* - * If set, this is used for preparing the system to power off. + * Temporary stub that prevents linkage failure while we're in process + * of removing all uses of legacy pm_power_off() around the kernel. */ - -void (*pm_power_off_prepare)(void); -EXPORT_SYMBOL_GPL(pm_power_off_prepare); +void __weak (*pm_power_off)(void); /** * emergency_restart - reboot the system @@ -281,6 +289,316 @@ void kernel_halt(void) } EXPORT_SYMBOL_GPL(kernel_halt); +/* + * Notifier list for kernel code which wants to be called + * to prepare system for power off. + */ +static BLOCKING_NOTIFIER_HEAD(power_off_prep_handler_list); + +/* + * Notifier list for kernel code which wants to be called + * to power off system. + */ +static ATOMIC_NOTIFIER_HEAD(power_off_handler_list); + +static int sys_off_notify(struct notifier_block *nb, + unsigned long mode, void *cmd) +{ + struct sys_off_handler *handler; + struct sys_off_data data = {}; + + handler = container_of(nb, struct sys_off_handler, nb); + data.cb_data = handler->cb_data; + data.mode = mode; + data.cmd = cmd; + + return handler->sys_off_cb(&data); +} + +/** + * register_sys_off_handler - Register sys-off handler + * @mode: Sys-off mode + * @priority: Handler priority + * @callback: Callback function + * @cb_data: Callback argument + * + * Registers system power-off or restart handler that will be invoked + * at the step corresponding to the given sys-off mode. Handler's callback + * should return NOTIFY_DONE to permit execution of the next handler in + * the call chain or NOTIFY_STOP to break the chain (in error case for + * example). + * + * Multiple handlers can be registered at the default priority level. + * + * Only one handler can be registered at the non-default priority level, + * otherwise ERR_PTR(-EBUSY) is returned. + * + * Returns a new instance of struct sys_off_handler on success, or + * an ERR_PTR()-encoded error code otherwise. + */ +struct sys_off_handler * +register_sys_off_handler(enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + struct sys_off_handler *handler; + int err; + + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return ERR_PTR(-ENOMEM); + + switch (mode) { + case SYS_OFF_MODE_POWER_OFF_PREPARE: + handler->list = &power_off_prep_handler_list; + handler->blocking = true; + break; + + case SYS_OFF_MODE_POWER_OFF: + handler->list = &power_off_handler_list; + break; + + case SYS_OFF_MODE_RESTART: + handler->list = &restart_handler_list; + break; + + default: + kfree(handler); + return ERR_PTR(-EINVAL); + } + + handler->nb.notifier_call = sys_off_notify; + handler->nb.priority = priority; + handler->sys_off_cb = callback; + handler->cb_data = cb_data; + handler->mode = mode; + + if (handler->blocking) { + if (priority == SYS_OFF_PRIO_DEFAULT) + err = blocking_notifier_chain_register(handler->list, + &handler->nb); + else + err = blocking_notifier_chain_register_unique_prio(handler->list, + &handler->nb); + } else { + if (priority == SYS_OFF_PRIO_DEFAULT) + err = atomic_notifier_chain_register(handler->list, + &handler->nb); + else + err = atomic_notifier_chain_register_unique_prio(handler->list, + &handler->nb); + } + + if (err) { + kfree(handler); + return ERR_PTR(err); + } + + return handler; +} +EXPORT_SYMBOL_GPL(register_sys_off_handler); + +/** + * unregister_sys_off_handler - Unregister sys-off handler + * @handler: Sys-off handler + * + * Unregisters given sys-off handler. + */ +void unregister_sys_off_handler(struct sys_off_handler *handler) +{ + int err; + + if (!handler) + return; + + if (handler->blocking) + err = blocking_notifier_chain_unregister(handler->list, + &handler->nb); + else + err = atomic_notifier_chain_unregister(handler->list, + &handler->nb); + + /* sanity check, shall never happen */ + WARN_ON(err); + + kfree(handler); +} +EXPORT_SYMBOL_GPL(unregister_sys_off_handler); + +static void devm_unregister_sys_off_handler(void *data) +{ + struct sys_off_handler *handler = data; + + unregister_sys_off_handler(handler); +} + +/** + * devm_register_sys_off_handler - Register sys-off handler + * @dev: Device that registers handler + * @mode: Sys-off mode + * @priority: Handler priority + * @callback: Callback function + * @cb_data: Callback argument + * + * Registers resource-managed sys-off handler. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_sys_off_handler(struct device *dev, + enum sys_off_mode mode, + int priority, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + struct sys_off_handler *handler; + + handler = register_sys_off_handler(mode, priority, callback, cb_data); + if (IS_ERR(handler)) + return PTR_ERR(handler); + + return devm_add_action_or_reset(dev, devm_unregister_sys_off_handler, + handler); +} +EXPORT_SYMBOL_GPL(devm_register_sys_off_handler); + +/** + * devm_register_power_off_handler - Register power-off handler + * @dev: Device that registers callback + * @callback: Callback function + * @cb_data: Callback's argument + * + * Registers resource-managed sys-off handler with a default priority + * and using power-off mode. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_power_off_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + return devm_register_sys_off_handler(dev, + SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + callback, cb_data); +} +EXPORT_SYMBOL_GPL(devm_register_power_off_handler); + +/** + * devm_register_restart_handler - Register restart handler + * @dev: Device that registers callback + * @callback: Callback function + * @cb_data: Callback's argument + * + * Registers resource-managed sys-off handler with a default priority + * and using restart mode. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_restart_handler(struct device *dev, + int (*callback)(struct sys_off_data *data), + void *cb_data) +{ + return devm_register_sys_off_handler(dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_DEFAULT, + callback, cb_data); +} +EXPORT_SYMBOL_GPL(devm_register_restart_handler); + +static struct sys_off_handler *platform_power_off_handler; + +static int platform_power_off_notify(struct sys_off_data *data) +{ + void (*platform_power_power_off_cb)(void) = data->cb_data; + + platform_power_power_off_cb(); + + return NOTIFY_DONE; +} + +/** + * register_platform_power_off - Register platform-level power-off callback + * @power_off: Power-off callback + * + * Registers power-off callback that will be called as last step + * of the power-off sequence. This callback is expected to be invoked + * for the last resort. Only one platform power-off callback is allowed + * to be registered at a time. + * + * Returns zero on success, or error code on failure. + */ +int register_platform_power_off(void (*power_off)(void)) +{ + struct sys_off_handler *handler; + + handler = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_PLATFORM, + platform_power_off_notify, + power_off); + if (IS_ERR(handler)) + return PTR_ERR(handler); + + platform_power_off_handler = handler; + + return 0; +} +EXPORT_SYMBOL_GPL(register_platform_power_off); + +/** + * unregister_platform_power_off - Unregister platform-level power-off callback + * @power_off: Power-off callback + * + * Unregisters previously registered platform power-off callback. + */ +void unregister_platform_power_off(void (*power_off)(void)) +{ + if (platform_power_off_handler && + platform_power_off_handler->cb_data == power_off) { + unregister_sys_off_handler(platform_power_off_handler); + platform_power_off_handler = NULL; + } +} +EXPORT_SYMBOL_GPL(unregister_platform_power_off); + +static int legacy_pm_power_off(struct sys_off_data *data) +{ + if (pm_power_off) + pm_power_off(); + + return NOTIFY_DONE; +} + +static void do_kernel_power_off_prepare(void) +{ + blocking_notifier_call_chain(&power_off_prep_handler_list, 0, NULL); +} + +/** + * do_kernel_power_off - Execute kernel power-off handler call chain + * + * Expected to be called as last step of the power-off sequence. + * + * Powers off the system immediately if a power-off handler function has + * been registered. Otherwise does nothing. + */ +void do_kernel_power_off(void) +{ + atomic_notifier_call_chain(&power_off_handler_list, 0, NULL); +} + +/** + * kernel_can_power_off - check whether system can be powered off + * + * Returns true if power-off handler is registered and system can be + * powered off, false otherwise. + */ +bool kernel_can_power_off(void) +{ + return !atomic_notifier_call_chain_is_empty(&power_off_handler_list); +} +EXPORT_SYMBOL_GPL(kernel_can_power_off); + /** * kernel_power_off - power_off the system * @@ -289,8 +607,7 @@ EXPORT_SYMBOL_GPL(kernel_halt); void kernel_power_off(void) { kernel_shutdown_prepare(SYSTEM_POWER_OFF); - if (pm_power_off_prepare) - pm_power_off_prepare(); + do_kernel_power_off_prepare(); migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("Power down\n"); @@ -313,6 +630,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg) { struct pid_namespace *pid_ns = task_active_pid_ns(current); + struct sys_off_handler *sys_off = NULL; char buffer[256]; int ret = 0; @@ -337,10 +655,25 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, if (ret) return ret; + /* + * Register sys-off handlers for legacy PM callback. This allows + * legacy PM callbacks temporary co-exist with the new sys-off API. + * + * TODO: Remove legacy handlers once all legacy PM users will be + * switched to the sys-off based APIs. + */ + if (pm_power_off) { + sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + legacy_pm_power_off, NULL); + if (IS_ERR(sys_off)) + return PTR_ERR(sys_off); + } + /* Instead of trying to make the power_off code look like * halt when pm_power_off is not set do it the easy way. */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !kernel_can_power_off()) cmd = LINUX_REBOOT_CMD_HALT; mutex_lock(&system_transition_mutex); @@ -394,6 +727,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, break; } mutex_unlock(&system_transition_mutex); + unregister_sys_off_handler(sys_off); return ret; } diff --git a/kernel/relay.c b/kernel/relay.c index d1a67fbb819d..6a611e779e95 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -440,7 +440,7 @@ int relay_prepare_cpu(unsigned int cpu) mutex_lock(&relay_channels_mutex); list_for_each_entry(chan, &relay_channels, list) { - if ((buf = *per_cpu_ptr(chan->buf, cpu))) + if (*per_cpu_ptr(chan->buf, cpu)) continue; buf = relay_open_buf(chan, cpu); if (!buf) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 696c6490bd5b..bfa7452ca92e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6353,10 +6353,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) /* * We must load prev->state once (task_struct::state is volatile), such - * that: - * - * - we form a control dependency vs deactivate_task() below. - * - ptrace_{,un}freeze_traced() can change ->state underneath us. + * that we form a control dependency vs deactivate_task() below. */ prev_state = READ_ONCE(prev->__state); if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8c5b74f66bd3..77b2048a9326 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2927,7 +2927,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr) /* * We don't care about NUMA placement if we don't have memory. */ - if ((curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work) + if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work) return; /* diff --git a/kernel/signal.c b/kernel/signal.c index e43bc2a692f5..edb1dc9b00dc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -762,7 +762,10 @@ still_pending: */ void signal_wake_up_state(struct task_struct *t, unsigned int state) { + lockdep_assert_held(&t->sighand->siglock); + set_tsk_thread_flag(t, TIF_SIGPENDING); + /* * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it @@ -884,7 +887,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, static void ptrace_trap_notify(struct task_struct *t) { WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); - assert_spin_locked(&t->sighand->siglock); + lockdep_assert_held(&t->sighand->siglock); task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); @@ -930,9 +933,10 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) for_each_thread(p, t) { flush_sigqueue_mask(&flush, &t->pending); task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); - if (likely(!(t->ptrace & PT_SEIZED))) + if (likely(!(t->ptrace & PT_SEIZED))) { + t->jobctl &= ~JOBCTL_STOPPED; wake_up_state(t, __TASK_STOPPED); - else + } else ptrace_trap_notify(t); } @@ -1071,15 +1075,15 @@ static inline bool legacy_queue(struct sigpending *signals, int sig) return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } -static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, - enum pid_type type, bool force) +static int __send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *t, enum pid_type type, bool force) { struct sigpending *pending; struct sigqueue *q; int override_rlimit; int ret = 0, result; - assert_spin_locked(&t->sighand->siglock); + lockdep_assert_held(&t->sighand->siglock); result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, force)) @@ -1212,8 +1216,8 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info) return ret; } -static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, - enum pid_type type) +int send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *t, enum pid_type type) { /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ bool force = false; @@ -1245,7 +1249,7 @@ static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct force = true; } } - return __send_signal(sig, info, t, type, force); + return __send_signal_locked(sig, info, t, type, force); } static void print_fatal_signal(int signr) @@ -1281,12 +1285,6 @@ static int __init setup_print_fatal_signals(char *str) __setup("print-fatal-signals=", setup_print_fatal_signals); -int -__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) -{ - return send_signal(sig, info, p, PIDTYPE_TGID); -} - int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type) { @@ -1294,7 +1292,7 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p int ret = -ESRCH; if (lock_task_sighand(p, &flags)) { - ret = send_signal(sig, info, p, type); + ret = send_signal_locked(sig, info, p, type); unlock_task_sighand(p, &flags); } @@ -1347,7 +1345,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, if (action->sa.sa_handler == SIG_DFL && (!t->ptrace || (handler == HANDLER_EXIT))) t->signal->flags &= ~SIGNAL_UNKILLABLE; - ret = send_signal(sig, info, t, PIDTYPE_PID); + ret = send_signal_locked(sig, info, t, PIDTYPE_PID); spin_unlock_irqrestore(&t->sighand->siglock, flags); return ret; @@ -1567,7 +1565,7 @@ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, if (sig) { if (lock_task_sighand(p, &flags)) { - ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false); + ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); unlock_task_sighand(p, &flags); } else ret = -ESRCH; @@ -2114,7 +2112,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * parent's namespaces. */ if (valid_signal(sig) && sig) - __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false); + __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); @@ -2184,7 +2182,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, spin_lock_irqsave(&sighand->siglock, flags); if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - __group_send_sig_info(SIGCHLD, &info, parent); + send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); /* * Even if SIGCHLD is not generated, we must wake up wait4 calls. */ @@ -2204,13 +2202,12 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, * with. If the code did not stop because the tracer is gone, * the stop signal remains unchanged unless clear_code. */ -static int ptrace_stop(int exit_code, int why, int clear_code, - unsigned long message, kernel_siginfo_t *info) +static int ptrace_stop(int exit_code, int why, unsigned long message, + kernel_siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { bool gstop_done = false; - bool read_code = true; if (arch_ptrace_stop_needed()) { /* @@ -2227,10 +2224,16 @@ static int ptrace_stop(int exit_code, int why, int clear_code, } /* - * schedule() will not sleep if there is a pending signal that - * can awaken the task. + * After this point ptrace_signal_wake_up or signal_wake_up + * will clear TASK_TRACED if ptrace_unlink happens or a fatal + * signal comes in. Handle previous ptrace_unlinks and fatal + * signals here to prevent ptrace_stop sleeping in schedule. */ + if (!current->ptrace || __fatal_signal_pending(current)) + return exit_code; + set_special_state(TASK_TRACED); + current->jobctl |= JOBCTL_TRACED; /* * We're committing to trapping. TRACED should be visible before @@ -2276,54 +2279,33 @@ static int ptrace_stop(int exit_code, int why, int clear_code, spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); - if (likely(current->ptrace)) { - /* - * Notify parents of the stop. - * - * While ptraced, there are two parents - the ptracer and - * the real_parent of the group_leader. The ptracer should - * know about every stop while the real parent is only - * interested in the completion of group stop. The states - * for the two don't interact with each other. Notify - * separately unless they're gonna be duplicates. - */ + /* + * Notify parents of the stop. + * + * While ptraced, there are two parents - the ptracer and + * the real_parent of the group_leader. The ptracer should + * know about every stop while the real parent is only + * interested in the completion of group stop. The states + * for the two don't interact with each other. Notify + * separately unless they're gonna be duplicates. + */ + if (current->ptrace) do_notify_parent_cldstop(current, true, why); - if (gstop_done && ptrace_reparented(current)) - do_notify_parent_cldstop(current, false, why); + if (gstop_done && (!current->ptrace || ptrace_reparented(current))) + do_notify_parent_cldstop(current, false, why); - /* - * Don't want to allow preemption here, because - * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). - */ - preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); - preempt_enable_no_resched(); - freezable_schedule(); - cgroup_leave_frozen(true); - } else { - /* - * By the time we got the lock, our tracer went away. - * Don't drop the lock yet, another tracer may come. - * - * If @gstop_done, the ptracer went away between group stop - * completion and here. During detach, it would have set - * JOBCTL_STOP_PENDING on us and we'll re-enter - * TASK_STOPPED in do_signal_stop() on return, so notifying - * the real parent of the group stop completion is enough. - */ - if (gstop_done) - do_notify_parent_cldstop(current, false, why); - - /* tasklist protects us from ptrace_freeze_traced() */ - __set_current_state(TASK_RUNNING); - read_code = false; - if (clear_code) - exit_code = 0; - read_unlock(&tasklist_lock); - } + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. + * + * XXX: implement read_unlock_no_resched(). + */ + preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); + preempt_enable_no_resched(); + freezable_schedule(); + cgroup_leave_frozen(true); /* * We are back. Now reacquire the siglock before touching @@ -2331,14 +2313,13 @@ static int ptrace_stop(int exit_code, int why, int clear_code, * any signal-sending on another CPU that wants to examine it. */ spin_lock_irq(¤t->sighand->siglock); - if (read_code) - exit_code = current->exit_code; + exit_code = current->exit_code; current->last_siginfo = NULL; current->ptrace_message = 0; current->exit_code = 0; /* LISTENING can be set only during STOP traps, clear it */ - current->jobctl &= ~JOBCTL_LISTENING; + current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); /* * Queued signals ignored us while we were stopped for tracing. @@ -2360,7 +2341,7 @@ static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long mes info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ - return ptrace_stop(exit_code, why, 1, message, &info); + return ptrace_stop(exit_code, why, message, &info); } int ptrace_notify(int exit_code, unsigned long message) @@ -2471,6 +2452,7 @@ static bool do_signal_stop(int signr) if (task_participate_group_stop(current)) notify = CLD_STOPPED; + current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); @@ -2532,7 +2514,7 @@ static void do_jobctl_trap(void) CLD_STOPPED, 0); } else { WARN_ON_ONCE(!signr); - ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); + ptrace_stop(signr, CLD_STOPPED, 0, NULL); } } @@ -2585,7 +2567,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) * comment in dequeue_signal(). */ current->jobctl |= JOBCTL_STOP_DEQUEUED; - signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); + signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); /* We're back. Did the debugger cancel the sig? */ if (signr == 0) @@ -2612,7 +2594,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) /* If the (new) signal is now blocked, requeue it. */ if (sigismember(¤t->blocked, signr) || fatal_signal_pending(current)) { - send_signal(signr, info, current, type); + send_signal_locked(signr, info, current, type); signr = 0; } @@ -4807,7 +4789,7 @@ void kdb_send_sig(struct task_struct *t, int sig) "the deadlock.\n"); return; } - ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); + ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); spin_unlock(&t->sighand->siglock); if (ret) kdb_printf("Fail to deliver Signal %d to process %d.\n", diff --git a/kernel/taskstats.c b/kernel/taskstats.c index bcac5a9043aa..f7e246336218 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/tsacct_kern.h> +#include <linux/acct.h> #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> @@ -153,6 +154,23 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } +static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{ + /* No idea if I'm allowed to access that here, now. */ + struct file *exe_file = get_task_exe_file(tsk); + + if (exe_file) { + /* Following cp_new_stat64() in stat.c . */ + stats->ac_exe_dev = + huge_encode_dev(exe_file->f_inode->i_sb->s_dev); + stats->ac_exe_inode = exe_file->f_inode->i_ino; + fput(exe_file); + } else { + stats->ac_exe_dev = 0; + stats->ac_exe_inode = 0; + } +} + static void fill_stats(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct task_struct *tsk, struct taskstats *stats) @@ -175,6 +193,9 @@ static void fill_stats(struct user_namespace *user_ns, /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); + + /* add executable info */ + exe_add_tsk(stats, tsk); } static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) @@ -620,6 +641,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) goto err; fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); + if (group_dead) + stats->ac_flag |= AGROUP; /* * Doesn't matter if tsk is the leader or the last group member leaving @@ -665,6 +688,7 @@ static struct genl_family family __ro_after_init = { .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), + .netnsok = true, }; /* Needed early in initialization */ diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0a97193984db..cb925e8ef9a8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -870,7 +870,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) { if (tsk->dl.dl_overrun) { tsk->dl.dl_overrun = 0; - __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } } @@ -884,7 +884,7 @@ static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) rt ? "RT" : "CPU", hard ? "hard" : "soft", current->comm, task_pid_nr(current)); } - __group_send_sig_info(signo, SEND_SIG_PRIV, current); + send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); return true; } @@ -958,7 +958,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, task_tgid(tsk), cur_time); - __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); + send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); } if (it->expires && it->expires < *expires) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d77cd8032213..0d261774d6f3 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -31,6 +31,10 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE GCOV_PROFILE := y endif +# Functions in this file could be invoked from early interrupt +# code and produce random code coverage. +KCOV_INSTRUMENT_trace_preemptirq.o := n + CFLAGS_bpf_trace.o := -I$(src) CFLAGS_trace_benchmark.o := -I$(src) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 3fd5284f6487..218cd95bf8e4 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -30,6 +30,7 @@ int ftrace_graph_active; /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; +#ifdef CONFIG_DYNAMIC_FTRACE /* * archs can override this function if they must do something * to enable hook for graph tracer. @@ -47,6 +48,7 @@ int __weak ftrace_disable_ftrace_graph_caller(void) { return 0; } +#endif /** * ftrace_graph_stop - set to permanently disable function graph tracing diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f8db59c3a601..e750fe141a60 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -45,6 +45,8 @@ #include "trace_output.h" #include "trace_stat.h" +#define FTRACE_INVALID_FUNCTION "__ftrace_invalid_address__" + #define FTRACE_WARN_ON(cond) \ ({ \ int ___r = cond; \ @@ -119,7 +121,7 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; -/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */ +/* Defined by vmlinux.lds.h see the comment above arch_ftrace_ops_list_func for details */ void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -952,7 +954,6 @@ static struct tracer_stat function_stats __initdata = { static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { struct ftrace_profile_stat *stat; - struct dentry *entry; char *name; int ret; int cpu; @@ -983,11 +984,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer) } } - entry = tracefs_create_file("function_profile_enabled", - TRACE_MODE_WRITE, d_tracer, NULL, - &ftrace_profile_fops); - if (!entry) - pr_warn("Could not create tracefs 'function_profile_enabled' entry\n"); + trace_create_file("function_profile_enabled", + TRACE_MODE_WRITE, d_tracer, NULL, + &ftrace_profile_fops); } #else /* CONFIG_FUNCTION_PROFILER */ @@ -2707,18 +2706,16 @@ ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec) * archs can override this function if they must do something * before the modifying code is performed. */ -int __weak ftrace_arch_code_modify_prepare(void) +void __weak ftrace_arch_code_modify_prepare(void) { - return 0; } /* * archs can override this function if they must do something * after the modifying code is performed. */ -int __weak ftrace_arch_code_modify_post_process(void) +void __weak ftrace_arch_code_modify_post_process(void) { - return 0; } void ftrace_modify_all_code(int command) @@ -2804,12 +2801,7 @@ void __weak arch_ftrace_update_code(int command) static void ftrace_run_update_code(int command) { - int ret; - - ret = ftrace_arch_code_modify_prepare(); - FTRACE_WARN_ON(ret); - if (ret) - return; + ftrace_arch_code_modify_prepare(); /* * By default we use stop_machine() to modify the code. @@ -2819,8 +2811,7 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); - ret = ftrace_arch_code_modify_post_process(); - FTRACE_WARN_ON(ret); + ftrace_arch_code_modify_post_process(); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -3631,6 +3622,105 @@ static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, seq_printf(m, " ->%pS", ptr); } +#ifdef FTRACE_MCOUNT_MAX_OFFSET +/* + * Weak functions can still have an mcount/fentry that is saved in + * the __mcount_loc section. These can be detected by having a + * symbol offset of greater than FTRACE_MCOUNT_MAX_OFFSET, as the + * symbol found by kallsyms is not the function that the mcount/fentry + * is part of. The offset is much greater in these cases. + * + * Test the record to make sure that the ip points to a valid kallsyms + * and if not, mark it disabled. + */ +static int test_for_valid_rec(struct dyn_ftrace *rec) +{ + char str[KSYM_SYMBOL_LEN]; + unsigned long offset; + const char *ret; + + ret = kallsyms_lookup(rec->ip, NULL, &offset, NULL, str); + + /* Weak functions can cause invalid addresses */ + if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) { + rec->flags |= FTRACE_FL_DISABLED; + return 0; + } + return 1; +} + +static struct workqueue_struct *ftrace_check_wq __initdata; +static struct work_struct ftrace_check_work __initdata; + +/* + * Scan all the mcount/fentry entries to make sure they are valid. + */ +static __init void ftrace_check_work_func(struct work_struct *work) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + test_for_valid_rec(rec); + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static int __init ftrace_check_for_weak_functions(void) +{ + INIT_WORK(&ftrace_check_work, ftrace_check_work_func); + + ftrace_check_wq = alloc_workqueue("ftrace_check_wq", WQ_UNBOUND, 0); + + queue_work(ftrace_check_wq, &ftrace_check_work); + return 0; +} + +static int __init ftrace_check_sync(void) +{ + /* Make sure the ftrace_check updates are finished */ + if (ftrace_check_wq) + destroy_workqueue(ftrace_check_wq); + return 0; +} + +late_initcall_sync(ftrace_check_sync); +subsys_initcall(ftrace_check_for_weak_functions); + +static int print_rec(struct seq_file *m, unsigned long ip) +{ + unsigned long offset; + char str[KSYM_SYMBOL_LEN]; + char *modname; + const char *ret; + + ret = kallsyms_lookup(ip, NULL, &offset, &modname, str); + /* Weak functions can cause invalid addresses */ + if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) { + snprintf(str, KSYM_SYMBOL_LEN, "%s_%ld", + FTRACE_INVALID_FUNCTION, offset); + ret = NULL; + } + + seq_puts(m, str); + if (modname) + seq_printf(m, " [%s]", modname); + return ret == NULL ? -1 : 0; +} +#else +static inline int test_for_valid_rec(struct dyn_ftrace *rec) +{ + return 1; +} + +static inline int print_rec(struct seq_file *m, unsigned long ip) +{ + seq_printf(m, "%ps", (void *)ip); + return 0; +} +#endif + static int t_show(struct seq_file *m, void *v) { struct ftrace_iterator *iter = m->private; @@ -3655,7 +3745,13 @@ static int t_show(struct seq_file *m, void *v) if (!rec) return 0; - seq_printf(m, "%ps", (void *)rec->ip); + if (print_rec(m, rec->ip)) { + /* This should only happen when a rec is disabled */ + WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED)); + seq_putc(m, '\n'); + return 0; + } + if (iter->flags & FTRACE_ITER_ENABLED) { struct ftrace_ops *ops; @@ -3973,6 +4069,24 @@ add_rec_by_index(struct ftrace_hash *hash, struct ftrace_glob *func_g, return 0; } +#ifdef FTRACE_MCOUNT_MAX_OFFSET +static int lookup_ip(unsigned long ip, char **modname, char *str) +{ + unsigned long offset; + + kallsyms_lookup(ip, NULL, &offset, modname, str); + if (offset > FTRACE_MCOUNT_MAX_OFFSET) + return -1; + return 0; +} +#else +static int lookup_ip(unsigned long ip, char **modname, char *str) +{ + kallsyms_lookup(ip, NULL, NULL, modname, str); + return 0; +} +#endif + static int ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, struct ftrace_glob *mod_g, int exclude_mod) @@ -3980,7 +4094,12 @@ ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, char str[KSYM_SYMBOL_LEN]; char *modname; - kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); + if (lookup_ip(rec->ip, &modname, str)) { + /* This should only happen when a rec is disabled */ + WARN_ON_ONCE(system_state == SYSTEM_RUNNING && + !(rec->flags & FTRACE_FL_DISABLED)); + return 0; + } if (mod_g) { int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0; @@ -4431,7 +4550,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, * @ip: The instruction pointer address to remove the data from * * Returns the data if it is found, otherwise NULL. - * Note, if the data pointer is used as the data itself, (see + * Note, if the data pointer is used as the data itself, (see * ftrace_func_mapper_find_ip(), then the return value may be meaningless, * if the data pointer was set to zero. */ @@ -4526,8 +4645,8 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *probe_ops, void *data) { + struct ftrace_func_probe *probe = NULL, *iter; struct ftrace_func_entry *entry; - struct ftrace_func_probe *probe; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; struct ftrace_hash *hash; @@ -4546,11 +4665,13 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_lock(&ftrace_lock); /* Check if the probe_ops is already registered */ - list_for_each_entry(probe, &tr->func_probes, list) { - if (probe->probe_ops == probe_ops) + list_for_each_entry(iter, &tr->func_probes, list) { + if (iter->probe_ops == probe_ops) { + probe = iter; break; + } } - if (&probe->list == &tr->func_probes) { + if (!probe) { probe = kzalloc(sizeof(*probe), GFP_KERNEL); if (!probe) { mutex_unlock(&ftrace_lock); @@ -4668,9 +4789,9 @@ int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *probe_ops) { + struct ftrace_func_probe *probe = NULL, *iter; struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *entry; - struct ftrace_func_probe *probe; struct ftrace_glob func_g; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; @@ -4698,11 +4819,13 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, mutex_lock(&ftrace_lock); /* Check if the probe_ops is already registered */ - list_for_each_entry(probe, &tr->func_probes, list) { - if (probe->probe_ops == probe_ops) + list_for_each_entry(iter, &tr->func_probes, list) { + if (iter->probe_ops == probe_ops) { + probe = iter; break; + } } - if (&probe->list == &tr->func_probes) + if (!probe) goto err_unlock_ftrace; ret = -EINVAL; @@ -5161,8 +5284,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) goto out_unlock; ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); - if (ret) - remove_hash_entry(direct_functions, entry); if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { ret = register_ftrace_function(&direct_ops); @@ -5171,6 +5292,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) } if (ret) { + remove_hash_entry(direct_functions, entry); kfree(entry); if (!direct->count) { list_del_rcu(&direct->next); @@ -6793,6 +6915,13 @@ void ftrace_module_enable(struct module *mod) !within_module_init(rec->ip, mod)) break; + /* Weak functions should still be ignored */ + if (!test_for_valid_rec(rec)) { + /* Clear all other flags. Should not be enabled anyway */ + rec->flags = FTRACE_FL_DISABLED; + continue; + } + cnt = 0; /* @@ -6829,11 +6958,16 @@ void ftrace_module_enable(struct module *mod) void ftrace_module_init(struct module *mod) { + int ret; + if (ftrace_disabled || !mod->num_ftrace_callsites) return; - ftrace_process_locs(mod, mod->ftrace_callsites, - mod->ftrace_callsites + mod->num_ftrace_callsites); + ret = ftrace_process_locs(mod, mod->ftrace_callsites, + mod->ftrace_callsites + mod->num_ftrace_callsites); + if (ret) + pr_warn("ftrace: failed to allocate entries for module '%s' functions\n", + mod->name); } static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, @@ -7166,15 +7300,19 @@ void __init ftrace_init(void) pr_info("ftrace: allocating %ld entries in %ld pages\n", count, count / ENTRIES_PER_PAGE + 1); - last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); + if (ret) { + pr_warn("ftrace: failed to allocate entries for functions\n"); + goto failed; + } pr_info("ftrace: allocated %ld pages with %ld groups\n", ftrace_number_of_pages, ftrace_number_of_groups); + last_ftrace_enabled = ftrace_enabled = 1; + set_ftrace_early_filters(); return; diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c index a2ef1d18126a..95106d02b32d 100644 --- a/kernel/trace/pid_list.c +++ b/kernel/trace/pid_list.c @@ -118,9 +118,9 @@ static inline unsigned int pid_join(unsigned int upper1, /** * trace_pid_list_is_set - test if the pid is set in the list * @pid_list: The pid list to test - * @pid: The pid to to see if set in the list. + * @pid: The pid to see if set in the list. * - * Tests if @pid is is set in the @pid_list. This is usually called + * Tests if @pid is set in the @pid_list. This is usually called * from the scheduler when a task is scheduled. Its pid is checked * if it should be traced or not. * diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 05dfc7a12d3d..d59b6a328b7f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -29,6 +29,14 @@ #include <asm/local.h> +/* + * The "absolute" timestamp in the buffer is only 59 bits. + * If a clock has the 5 MSBs set, it needs to be saved and + * reinserted. + */ +#define TS_MSB (0xf8ULL << 56) +#define ABS_TS_MASK (~TS_MSB) + static void update_pages_handler(struct work_struct *work); /* @@ -468,6 +476,7 @@ struct rb_time_struct { local_t cnt; local_t top; local_t bottom; + local_t msb; }; #else #include <asm/local64.h> @@ -569,7 +578,6 @@ struct ring_buffer_iter { * For the ring buffer, 64 bit required operations for the time is * the following: * - * - Only need 59 bits (uses 60 to make it even). * - Reads may fail if it interrupted a modification of the time stamp. * It will succeed if it did not interrupt another write even if * the read itself is interrupted by a write. @@ -594,6 +602,7 @@ struct ring_buffer_iter { */ #define RB_TIME_SHIFT 30 #define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1) +#define RB_TIME_MSB_SHIFT 60 static inline int rb_time_cnt(unsigned long val) { @@ -613,7 +622,7 @@ static inline u64 rb_time_val(unsigned long top, unsigned long bottom) static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) { - unsigned long top, bottom; + unsigned long top, bottom, msb; unsigned long c; /* @@ -625,6 +634,7 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) c = local_read(&t->cnt); top = local_read(&t->top); bottom = local_read(&t->bottom); + msb = local_read(&t->msb); } while (c != local_read(&t->cnt)); *cnt = rb_time_cnt(top); @@ -633,7 +643,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) if (*cnt != rb_time_cnt(bottom)) return false; - *ret = rb_time_val(top, bottom); + /* The shift to msb will lose its cnt bits */ + *ret = rb_time_val(top, bottom) | ((u64)msb << RB_TIME_MSB_SHIFT); return true; } @@ -649,10 +660,12 @@ static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT); } -static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom) +static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom, + unsigned long *msb) { *top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK); *bottom = (unsigned long)(val & RB_TIME_VAL_MASK); + *msb = (unsigned long)(val >> RB_TIME_MSB_SHIFT); } static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt) @@ -663,15 +676,16 @@ static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long static void rb_time_set(rb_time_t *t, u64 val) { - unsigned long cnt, top, bottom; + unsigned long cnt, top, bottom, msb; - rb_time_split(val, &top, &bottom); + rb_time_split(val, &top, &bottom, &msb); /* Writes always succeed with a valid number even if it gets interrupted. */ do { cnt = local_inc_return(&t->cnt); rb_time_val_set(&t->top, top, cnt); rb_time_val_set(&t->bottom, bottom, cnt); + rb_time_val_set(&t->msb, val >> RB_TIME_MSB_SHIFT, cnt); } while (cnt != local_read(&t->cnt)); } @@ -686,8 +700,8 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) { - unsigned long cnt, top, bottom; - unsigned long cnt2, top2, bottom2; + unsigned long cnt, top, bottom, msb; + unsigned long cnt2, top2, bottom2, msb2; u64 val; /* The cmpxchg always fails if it interrupted an update */ @@ -703,16 +717,18 @@ static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) cnt2 = cnt + 1; - rb_time_split(val, &top, &bottom); + rb_time_split(val, &top, &bottom, &msb); top = rb_time_val_cnt(top, cnt); bottom = rb_time_val_cnt(bottom, cnt); - rb_time_split(set, &top2, &bottom2); + rb_time_split(set, &top2, &bottom2, &msb2); top2 = rb_time_val_cnt(top2, cnt2); bottom2 = rb_time_val_cnt(bottom2, cnt2); if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2)) return false; + if (!rb_time_read_cmpxchg(&t->msb, msb, msb2)) + return false; if (!rb_time_read_cmpxchg(&t->top, top, top2)) return false; if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2)) @@ -783,6 +799,24 @@ static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, } #endif +/* + * The absolute time stamp drops the 5 MSBs and some clocks may + * require them. The rb_fix_abs_ts() will take a previous full + * time stamp, and add the 5 MSB of that time stamp on to the + * saved absolute time stamp. Then they are compared in case of + * the unlikely event that the latest time stamp incremented + * the 5 MSB. + */ +static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts) +{ + if (save_ts & TS_MSB) { + abs |= save_ts & TS_MSB; + /* Check for overflow */ + if (unlikely(abs < save_ts)) + abs += 1ULL << 59; + } + return abs; +} static inline u64 rb_time_stamp(struct trace_buffer *buffer); @@ -811,8 +845,10 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, u64 ts; /* If the event includes an absolute time, then just use that */ - if (event->type_len == RINGBUF_TYPE_TIME_STAMP) - return rb_event_time_stamp(event); + if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { + ts = rb_event_time_stamp(event); + return rb_fix_abs_ts(ts, cpu_buffer->tail_page->page->time_stamp); + } nest = local_read(&cpu_buffer->committing); verify_event(cpu_buffer, event); @@ -2754,8 +2790,15 @@ static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE); if (unlikely(info->delta > (1ULL << 59))) { + /* + * Some timers can use more than 59 bits, and when a timestamp + * is added to the buffer, it will lose those bits. + */ + if (abs && (info->ts & TS_MSB)) { + info->delta &= ABS_TS_MASK; + /* did the clock go backwards */ - if (info->before == info->after && info->before > info->ts) { + } else if (info->before == info->after && info->before > info->ts) { /* not interrupted */ static int once; @@ -3304,7 +3347,7 @@ static void dump_buffer_page(struct buffer_data_page *bpage, case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); - ts = delta; + ts = rb_fix_abs_ts(delta, ts); pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta); break; @@ -3380,7 +3423,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); - ts = delta; + ts = rb_fix_abs_ts(delta, ts); break; case RINGBUF_TYPE_PADDING: @@ -4367,6 +4410,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); + delta = rb_fix_abs_ts(delta, cpu_buffer->read_stamp); cpu_buffer->read_stamp = delta; return; @@ -4397,6 +4441,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); + delta = rb_fix_abs_ts(delta, iter->read_stamp); iter->read_stamp = delta; return; @@ -4650,6 +4695,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, case RINGBUF_TYPE_TIME_STAMP: if (ts) { *ts = rb_event_time_stamp(event); + *ts = rb_fix_abs_ts(*ts, reader->page->time_stamp); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } @@ -4741,6 +4787,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) case RINGBUF_TYPE_TIME_STAMP: if (ts) { *ts = rb_event_time_stamp(event); + *ts = rb_fix_abs_ts(*ts, iter->head_page->page->time_stamp); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } @@ -6011,10 +6058,10 @@ static __init int test_ringbuffer(void) pr_info(" total events: %ld\n", total_lost + total_read); pr_info(" recorded len bytes: %ld\n", total_len); pr_info(" recorded size bytes: %ld\n", total_size); - if (total_lost) + if (total_lost) { pr_info(" With dropped events, record len and size may not match\n" " alloced and written from above\n"); - if (!total_lost) { + } else { if (RB_WARN_ON(buffer, total_len != total_alloc || total_size != total_written)) break; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 124f1897fd56..2c95992e2c71 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -721,13 +721,16 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pos = 0; ret = trace_get_user(&parser, ubuf, cnt, &pos); - if (ret < 0 || !trace_parser_loaded(&parser)) + if (ret < 0) break; read += ret; ubuf += ret; cnt -= ret; + if (!trace_parser_loaded(&parser)) + break; + ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; @@ -753,7 +756,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, if (!nr_pids) { /* Cleared the list of pids */ trace_pid_list_free(pid_list); - read = ret; pid_list = NULL; } @@ -1174,7 +1176,7 @@ void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) EXPORT_SYMBOL_GPL(tracing_snapshot_cond); /** - * tracing_snapshot_cond_data - get the user data associated with a snapshot + * tracing_cond_snapshot_data - get the user data associated with a snapshot * @tr: The tracing instance * * When the user enables a conditional snapshot using @@ -1542,6 +1544,7 @@ static struct { { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, { ktime_get_boot_fast_ns, "boot", 1 }, + { ktime_get_tai_fast_ns, "tai", 1 }, ARCH_TRACE_CLOCKS }; @@ -2835,7 +2838,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -static DEFINE_SPINLOCK(tracepoint_iter_lock); +static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) @@ -2863,14 +2866,14 @@ static void output_printk(struct trace_event_buffer *fbuffer) event = &fbuffer->trace_file->event_call->event; - spin_lock_irqsave(&tracepoint_iter_lock, flags); + raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); iter->ent = fbuffer->entry; event_call->event.funcs->trace(iter, 0, event); trace_seq_putc(&iter->seq, 0); printk("%s", iter->seq.buffer); - spin_unlock_irqrestore(&tracepoint_iter_lock, flags); + raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } int tracepoint_printk_sysctl(struct ctl_table *table, int write, @@ -4249,7 +4252,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; + static const char space[] = " "; int prec = tgid ? 12 : 2; print_event_info(buf, m); @@ -4273,9 +4276,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) struct tracer *type = iter->trace; unsigned long entries; unsigned long total; - const char *name = "preemption"; - - name = type->name; + const char *name = type->name; get_total_entries(buf, &total, &entries); @@ -5469,7 +5470,7 @@ static const char readme_msg[] = " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" - " trace_clock\t\t-change the clock used to order events\n" + " trace_clock\t\t- change the clock used to order events\n" " local: Per cpu clock but may not be synced across CPUs\n" " global: Synced across CPUs but slows tracing down.\n" " counter: Not a clock, but just an increment\n" @@ -5478,7 +5479,7 @@ static const char readme_msg[] = #ifdef CONFIG_X86_64 " x86-tsc: TSC cycle counter\n" #endif - "\n timestamp_mode\t-view the mode used to timestamp events\n" + "\n timestamp_mode\t- view the mode used to timestamp events\n" " delta: Delta difference against a buffer-wide timestamp\n" " absolute: Absolute (standalone) timestamp\n" "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" @@ -6326,12 +6327,18 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } +static bool tracer_options_updated; + static void add_tracer_options(struct trace_array *tr, struct tracer *t) { /* Only enable if the directory has been created already. */ if (!tr->dir) return; + /* Only create trace option files after update_tracer_options finish */ + if (!tracer_options_updated) + return; + create_trace_option_files(tr, t); } @@ -6448,7 +6455,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+1]; - int i; + char *name; size_t ret; int err; @@ -6462,11 +6469,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + name = strim(buf); - err = tracing_set_tracer(tr, buf); + err = tracing_set_tracer(tr, name); if (err) return err; @@ -9170,6 +9175,7 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { mutex_lock(&trace_types_lock); + tracer_options_updated = true; __update_tracer_options(tr); mutex_unlock(&trace_types_lock); } @@ -9602,6 +9608,7 @@ extern struct trace_eval_map *__stop_ftrace_eval_maps[]; static struct workqueue_struct *eval_map_wq __initdata; static struct work_struct eval_map_work __initdata; +static struct work_struct tracerfs_init_work __initdata; static void __init eval_map_work_func(struct work_struct *work) { @@ -9627,6 +9634,8 @@ static int __init trace_eval_init(void) return 0; } +subsys_initcall(trace_eval_init); + static int __init trace_eval_sync(void) { /* Make sure the eval map updates are finished */ @@ -9709,15 +9718,8 @@ static struct notifier_block trace_module_nb = { }; #endif /* CONFIG_MODULES */ -static __init int tracer_init_tracefs(void) +static __init void tracer_init_tracefs_work_func(struct work_struct *work) { - int ret; - - trace_access_lock_init(); - - ret = tracing_init_dentry(); - if (ret) - return 0; event_trace_init(); @@ -9739,8 +9741,6 @@ static __init int tracer_init_tracefs(void) trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, NULL, &tracing_saved_tgids_fops); - trace_eval_init(); - trace_create_eval_file(NULL); #ifdef CONFIG_MODULES @@ -9755,6 +9755,24 @@ static __init int tracer_init_tracefs(void) create_trace_instances(NULL); update_tracer_options(&global_trace); +} + +static __init int tracer_init_tracefs(void) +{ + int ret; + + trace_access_lock_init(); + + ret = tracing_init_dentry(); + if (ret) + return 0; + + if (eval_map_wq) { + INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); + queue_work(eval_map_wq, &tracerfs_init_work); + } else { + tracer_init_tracefs_work_func(NULL); + } return 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 07d990270e2a..ff816fb41e48 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1573,13 +1573,12 @@ struct enable_trigger_data { }; extern int event_enable_trigger_print(struct seq_file *m, - struct event_trigger_ops *ops, - struct event_trigger_data *data); -extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); +extern void event_enable_trigger_free(struct event_trigger_data *data); extern int event_enable_trigger_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param); + char *glob, char *cmd, + char *param_and_filter); extern int event_enable_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file); @@ -1587,8 +1586,7 @@ extern void event_enable_unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); -extern int event_trigger_init(struct event_trigger_ops *ops, - struct event_trigger_data *data); +extern int event_trigger_init(struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable); extern void update_cond_flag(struct trace_event_file *file); @@ -1629,10 +1627,11 @@ extern void event_trigger_reset_filter(struct event_command *cmd_ops, extern int event_trigger_register(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, - char *cmd, - char *trigger, - struct event_trigger_data *trigger_data, - int *n_registered); + struct event_trigger_data *trigger_data); +extern void event_trigger_unregister(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + struct event_trigger_data *trigger_data); /** * struct event_trigger_ops - callbacks for trace event triggers @@ -1686,12 +1685,9 @@ struct event_trigger_ops { struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe); - int (*init)(struct event_trigger_ops *ops, - struct event_trigger_data *data); - void (*free)(struct event_trigger_ops *ops, - struct event_trigger_data *data); + int (*init)(struct event_trigger_data *data); + void (*free)(struct event_trigger_data *data); int (*print)(struct seq_file *m, - struct event_trigger_ops *ops, struct event_trigger_data *data); }; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 0580287d7a0d..778200dd8ede 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -300,7 +300,7 @@ trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp, { struct xbc_node *node; const char *p, *handler; - int ret; + int ret = 0; handler = xbc_node_get_data(hnode); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index e34e8182ee4b..076b447a1b88 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -255,19 +255,14 @@ static const struct file_operations dynamic_events_ops = { /* Make a tracefs interface for controlling dynamic events */ static __init int init_dynamic_event(void) { - struct dentry *entry; int ret; ret = tracing_init_dentry(); if (ret) return 0; - entry = tracefs_create_file("dynamic_events", TRACE_MODE_WRITE, NULL, - NULL, &dynamic_events_ops); - - /* Event list interface */ - if (!entry) - pr_warn("Could not create tracefs 'dynamic_events' entry\n"); + trace_create_file("dynamic_events", TRACE_MODE_WRITE, NULL, + NULL, &dynamic_events_ops); return 0; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 541aa13581b9..7d4478525c66 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -511,20 +511,17 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) * functions are just stubs to fulfill what is needed to use the trigger * infrastructure. */ -static int eprobe_trigger_init(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static int eprobe_trigger_init(struct event_trigger_data *data) { return 0; } -static void eprobe_trigger_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static void eprobe_trigger_free(struct event_trigger_data *data) { } static int eprobe_trigger_print(struct seq_file *m, - struct event_trigger_ops *ops, struct event_trigger_data *data) { /* Do not print eprobe event triggers */ @@ -549,7 +546,8 @@ static struct event_trigger_ops eprobe_trigger_ops = { static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param) + char *glob, char *cmd, + char *param_and_filter) { return -1; } @@ -650,7 +648,7 @@ static struct trace_event_functions eprobe_funcs = { static int disable_eprobe(struct trace_eprobe *ep, struct trace_array *tr) { - struct event_trigger_data *trigger; + struct event_trigger_data *trigger = NULL, *iter; struct trace_event_file *file; struct eprobe_data *edata; @@ -658,14 +656,16 @@ static int disable_eprobe(struct trace_eprobe *ep, if (!file) return -ENOENT; - list_for_each_entry(trigger, &file->triggers, list) { - if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE)) + list_for_each_entry(iter, &file->triggers, list) { + if (!(iter->flags & EVENT_TRIGGER_FL_PROBE)) continue; - edata = trigger->private_data; - if (edata->ep == ep) + edata = iter->private_data; + if (edata->ep == ep) { + trigger = iter; break; + } } - if (list_entry_is_head(trigger, &file->triggers, list)) + if (!trigger) return -ENODEV; list_del_rcu(&trigger->list); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f97de82d1342..181f08186d32 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -392,12 +392,6 @@ static void test_event_printk(struct trace_event_call *call) if (!(dereference_flags & (1ULL << arg))) goto next_arg; - /* Check for __get_sockaddr */; - if (str_has_prefix(fmt + i, "__get_sockaddr(")) { - dereference_flags &= ~(1ULL << arg); - goto next_arg; - } - /* Find the REC-> in the argument */ c = strchr(fmt + i, ','); r = strstr(fmt + i, "REC->"); @@ -413,7 +407,14 @@ static void test_event_printk(struct trace_event_call *call) a = strchr(fmt + i, '&'); if ((a && (a < r)) || test_field(r, call)) dereference_flags &= ~(1ULL << arg); + } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && + (!c || r < c)) { + dereference_flags &= ~(1ULL << arg); + } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && + (!c || r < c)) { + dereference_flags &= ~(1ULL << arg); } + next_arg: i--; arg++; @@ -1723,9 +1724,9 @@ static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { + struct trace_subsystem_dir *dir = NULL, *iter_dir; + struct trace_array *tr = NULL, *iter_tr; struct event_subsystem *system = NULL; - struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */ - struct trace_array *tr; int ret; if (tracing_is_disabled()) @@ -1734,10 +1735,12 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Make sure the system still exists */ mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - list_for_each_entry(dir, &tr->systems, list) { - if (dir == inode->i_private) { + list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { + list_for_each_entry(iter_dir, &iter_tr->systems, list) { + if (iter_dir == inode->i_private) { /* Don't open systems with no events */ + tr = iter_tr; + dir = iter_dir; if (dir->nr_events) { __get_system_dir(dir); system = dir->subsystem; @@ -1753,9 +1756,6 @@ static int subsystem_open(struct inode *inode, struct file *filp) if (!system) return -ENODEV; - /* Some versions of gcc think dir can be uninitialized here */ - WARN_ON(!dir); - /* Still need to increment the ref count of the system */ if (trace_array_get(tr) < 0) { put_system(dir); @@ -2280,8 +2280,8 @@ static struct dentry * event_subsystem_dir(struct trace_array *tr, const char *name, struct trace_event_file *file, struct dentry *parent) { + struct event_subsystem *system, *iter; struct trace_subsystem_dir *dir; - struct event_subsystem *system; struct dentry *entry; /* First see if we did not already create this dir */ @@ -2295,13 +2295,13 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } /* Now see if the system itself exists. */ - list_for_each_entry(system, &event_subsystems, list) { - if (strcmp(system->name, name) == 0) + system = NULL; + list_for_each_entry(iter, &event_subsystems, list) { + if (strcmp(iter->name, name) == 0) { + system = iter; break; + } } - /* Reset system variable when not found */ - if (&system->list == &event_subsystems) - system = NULL; dir = kmalloc(sizeof(*dir), GFP_KERNEL); if (!dir) @@ -3546,12 +3546,10 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) struct dentry *d_events; struct dentry *entry; - entry = tracefs_create_file("set_event", TRACE_MODE_WRITE, parent, - tr, &ftrace_set_event_fops); - if (!entry) { - pr_warn("Could not create tracefs 'set_event' entry\n"); + entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_fops); + if (!entry) return -ENOMEM; - } d_events = tracefs_create_dir("events", parent); if (!d_events) { @@ -3566,16 +3564,12 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) /* There are not as crucial, just warn if they are not created */ - entry = tracefs_create_file("set_event_pid", TRACE_MODE_WRITE, parent, - tr, &ftrace_set_event_pid_fops); - if (!entry) - pr_warn("Could not create tracefs 'set_event_pid' entry\n"); + trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_pid_fops); - entry = tracefs_create_file("set_event_notrace_pid", - TRACE_MODE_WRITE, parent, tr, - &ftrace_set_event_notrace_pid_fops); - if (!entry) - pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); + trace_create_file("set_event_notrace_pid", + TRACE_MODE_WRITE, parent, tr, + &ftrace_set_event_notrace_pid_fops); /* ring buffer internal formats */ trace_create_file("header_page", TRACE_MODE_READ, d_events, @@ -3790,17 +3784,14 @@ static __init int event_trace_init_fields(void) __init int event_trace_init(void) { struct trace_array *tr; - struct dentry *entry; int ret; tr = top_trace_array(); if (!tr) return -ENODEV; - entry = tracefs_create_file("available_events", TRACE_MODE_READ, - NULL, tr, &ftrace_avail_fops); - if (!entry) - pr_warn("Could not create tracefs 'available_events' entry\n"); + trace_create_file("available_events", TRACE_MODE_READ, + NULL, tr, &ftrace_avail_fops); ret = early_event_add_tracer(NULL, tr); if (ret) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index b458a9afa2c0..4b1057ab9d96 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1816,7 +1816,7 @@ static void create_filter_finish(struct filter_parse_error *pe) * create_filter - create a filter for a trace_event_call * @tr: the trace array associated with these events * @call: trace_event_call to create a filter for - * @filter_str: filter string + * @filter_string: filter string * @set_str: remember @filter_str and enable detailed error in filter * @filterp: out param for created filter (always updated on return) * Must be a pointer that references a NULL pointer. diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 44db5ba9cabb..48e82e141d54 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2093,8 +2093,11 @@ static int init_var_ref(struct hist_field *ref_field, return err; free: kfree(ref_field->system); + ref_field->system = NULL; kfree(ref_field->event_name); + ref_field->event_name = NULL; kfree(ref_field->name); + ref_field->name = NULL; goto out; } @@ -2785,7 +2788,8 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, static struct event_command trigger_hist_cmd; static int event_hist_trigger_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param); + char *glob, char *cmd, + char *param_and_filter); static bool compatible_keys(struct hist_trigger_data *target_hist_data, struct hist_trigger_data *hist_data, @@ -4161,7 +4165,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0); } -static const char *no_comm = "(no comm)"; +static const char no_comm[] = "(no comm)"; static u64 hist_field_execname(struct hist_field *hist_field, struct tracing_map_elt *elt, @@ -5252,7 +5256,7 @@ static void hist_trigger_show(struct seq_file *m, seq_puts(m, "\n\n"); seq_puts(m, "# event histogram\n#\n# trigger info: "); - data->ops->print(m, data->ops, data); + data->ops->print(m, data); seq_puts(m, "#\n\n"); hist_data = data->private_data; @@ -5484,7 +5488,7 @@ static void hist_trigger_debug_show(struct seq_file *m, seq_puts(m, "\n\n"); seq_puts(m, "# event histogram\n#\n# trigger info: "); - data->ops->print(m, data->ops, data); + data->ops->print(m, data); seq_puts(m, "#\n\n"); hist_data = data->private_data; @@ -5621,7 +5625,6 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) } static int event_hist_trigger_print(struct seq_file *m, - struct event_trigger_ops *ops, struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; @@ -5729,8 +5732,7 @@ static int event_hist_trigger_print(struct seq_file *m, return 0; } -static int event_hist_trigger_init(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static int event_hist_trigger_init(struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; @@ -5758,8 +5760,7 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data) } } -static void event_hist_trigger_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static void event_hist_trigger_free(struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; @@ -5788,25 +5789,23 @@ static struct event_trigger_ops event_hist_trigger_ops = { .free = event_hist_trigger_free, }; -static int event_hist_trigger_named_init(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static int event_hist_trigger_named_init(struct event_trigger_data *data) { data->ref++; save_named_trigger(data->named_data->name, data); - event_hist_trigger_init(ops, data->named_data); + event_hist_trigger_init(data->named_data); return 0; } -static void event_hist_trigger_named_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +static void event_hist_trigger_named_free(struct event_trigger_data *data) { if (WARN_ON_ONCE(data->ref <= 0)) return; - event_hist_trigger_free(ops, data->named_data); + event_hist_trigger_free(data->named_data); data->ref--; if (!data->ref) { @@ -5933,6 +5932,48 @@ static bool hist_trigger_match(struct event_trigger_data *data, return true; } +static bool existing_hist_update_only(char *glob, + struct event_trigger_data *data, + struct trace_event_file *file) +{ + struct hist_trigger_data *hist_data = data->private_data; + struct event_trigger_data *test, *named_data = NULL; + bool updated = false; + + if (!hist_data->attrs->pause && !hist_data->attrs->cont && + !hist_data->attrs->clear) + goto out; + + if (hist_data->attrs->name) { + named_data = find_named_trigger(hist_data->attrs->name); + if (named_data) { + if (!hist_trigger_match(data, named_data, named_data, + true)) + goto out; + } + } + + if (hist_data->attrs->name && !named_data) + goto out; + + list_for_each_entry(test, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + if (!hist_trigger_match(data, test, named_data, false)) + continue; + if (hist_data->attrs->pause) + test->paused = true; + else if (hist_data->attrs->cont) + test->paused = false; + else if (hist_data->attrs->clear) + hist_clear(test); + updated = true; + goto out; + } + } + out: + return updated; +} + static int hist_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) @@ -5961,19 +6002,11 @@ static int hist_register_trigger(char *glob, list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { - if (!hist_trigger_match(data, test, named_data, false)) - continue; - if (hist_data->attrs->pause) - test->paused = true; - else if (hist_data->attrs->cont) - test->paused = false; - else if (hist_data->attrs->clear) - hist_clear(test); - else { + if (hist_trigger_match(data, test, named_data, false)) { hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; + goto out; } - goto out; } } new: @@ -5993,7 +6026,7 @@ static int hist_register_trigger(char *glob, } if (data->ops->init) { - ret = data->ops->init(data->ops, data); + ret = data->ops->init(data); if (ret < 0) goto out; } @@ -6012,8 +6045,6 @@ static int hist_register_trigger(char *glob, if (named_data) destroy_hist_data(hist_data); - - ret++; out: return ret; } @@ -6089,20 +6120,19 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { + struct event_trigger_data *test = NULL, *iter, *named_data = NULL; struct hist_trigger_data *hist_data = data->private_data; - struct event_trigger_data *test, *named_data = NULL; - bool unregistered = false; lockdep_assert_held(&event_mutex); if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name); - list_for_each_entry(test, &file->triggers, list) { - if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { - if (!hist_trigger_match(data, test, named_data, false)) + list_for_each_entry(iter, &file->triggers, list) { + if (iter->cmd_ops->trigger_type == ETT_EVENT_HIST) { + if (!hist_trigger_match(data, iter, named_data, false)) continue; - unregistered = true; + test = iter; list_del_rcu(&test->list); trace_event_trigger_enable_disable(file, 0); update_cond_flag(file); @@ -6110,11 +6140,11 @@ static void hist_unregister_trigger(char *glob, } } - if (unregistered && test->ops->free) - test->ops->free(test->ops, test); + if (test && test->ops->free) + test->ops->free(test); if (hist_data->enable_timestamps) { - if (!hist_data->remove || unregistered) + if (!hist_data->remove || test) tracing_set_filter_buffering(file->tr, false); } } @@ -6164,57 +6194,57 @@ static void hist_unreg_all(struct trace_event_file *file) if (hist_data->enable_timestamps) tracing_set_filter_buffering(file->tr, false); if (test->ops->free) - test->ops->free(test->ops, test); + test->ops->free(test); } } } static int event_hist_trigger_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param) + char *glob, char *cmd, + char *param_and_filter) { unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT; struct event_trigger_data *trigger_data; struct hist_trigger_attrs *attrs; - struct event_trigger_ops *trigger_ops; struct hist_trigger_data *hist_data; + char *param, *filter, *p, *start; struct synth_event *se; const char *se_name; - bool remove = false; - char *trigger, *p, *start; + bool remove; int ret = 0; lockdep_assert_held(&event_mutex); - WARN_ON(!glob); + if (WARN_ON(!glob)) + return -EINVAL; - if (strlen(glob)) { + if (glob[0]) { hist_err_clear(); - last_cmd_set(file, param); + last_cmd_set(file, param_and_filter); } - if (!param) - return -EINVAL; + remove = event_trigger_check_remove(glob); - if (glob[0] == '!') - remove = true; + if (event_trigger_empty_param(param_and_filter)) + return -EINVAL; /* * separate the trigger from the filter (k:v [if filter]) * allowing for whitespace in the trigger */ - p = trigger = param; + p = param = param_and_filter; do { p = strstr(p, "if"); if (!p) break; - if (p == param) + if (p == param_and_filter) return -EINVAL; if (*(p - 1) != ' ' && *(p - 1) != '\t') { p++; continue; } - if (p >= param + strlen(param) - (sizeof("if") - 1) - 1) + if (p >= param_and_filter + strlen(param_and_filter) - (sizeof("if") - 1) - 1) return -EINVAL; if (*(p + sizeof("if") - 1) != ' ' && *(p + sizeof("if") - 1) != '\t') { p++; @@ -6224,24 +6254,24 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, } while (1); if (!p) - param = NULL; + filter = NULL; else { *(p - 1) = '\0'; - param = strstrip(p); - trigger = strstrip(trigger); + filter = strstrip(p); + param = strstrip(param); } /* * To simplify arithmetic expression parsing, replace occurrences of * '.sym-offset' modifier with '.symXoffset' */ - start = strstr(trigger, ".sym-offset"); + start = strstr(param, ".sym-offset"); while (start) { *(start + 4) = 'X'; start = strstr(start + 11, ".sym-offset"); } - attrs = parse_hist_trigger_attrs(file->tr, trigger); + attrs = parse_hist_trigger_attrs(file->tr, param); if (IS_ERR(attrs)) return PTR_ERR(attrs); @@ -6254,29 +6284,15 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, return PTR_ERR(hist_data); } - trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); - - trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + trigger_data = event_trigger_alloc(cmd_ops, cmd, param, hist_data); if (!trigger_data) { ret = -ENOMEM; goto out_free; } - trigger_data->count = -1; - trigger_data->ops = trigger_ops; - trigger_data->cmd_ops = cmd_ops; - - INIT_LIST_HEAD(&trigger_data->list); - RCU_INIT_POINTER(trigger_data->filter, NULL); - - trigger_data->private_data = hist_data; - - /* if param is non-empty, it's supposed to be a filter */ - if (param && cmd_ops->set_filter) { - ret = cmd_ops->set_filter(param, trigger_data, file); - if (ret < 0) - goto out_free; - } + ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data); + if (ret < 0) + goto out_free; if (remove) { if (!have_hist_trigger_match(trigger_data, file)) @@ -6287,7 +6303,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, goto out_free; } - cmd_ops->unreg(glob+1, trigger_data, file); + event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); se_name = trace_event_name(file->event_call); se = find_synth_event(se_name); if (se) @@ -6296,17 +6312,11 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, goto out_free; } - ret = cmd_ops->reg(glob, trigger_data, file); - /* - * The above returns on success the # of triggers registered, - * but if it didn't register any it returns zero. Consider no - * triggers registered a failure too. - */ - if (!ret) { - if (!(attrs->pause || attrs->cont || attrs->clear)) - ret = -ENOENT; + if (existing_hist_update_only(glob, trigger_data, file)) goto out_free; - } else if (ret < 0) + + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret < 0) goto out_free; if (get_named_trigger_data(trigger_data)) @@ -6331,18 +6341,15 @@ enable: se = find_synth_event(se_name); if (se) se->ref++; - /* Just return zero, not the number of registered triggers */ - ret = 0; out: if (ret == 0) hist_err_clear(); return ret; out_unreg: - cmd_ops->unreg(glob+1, trigger_data, file); + event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); out_free: - if (cmd_ops->set_filter) - cmd_ops->set_filter(NULL, trigger_data, NULL); + event_trigger_reset_filter(cmd_ops, trigger_data); remove_hist_vars(hist_data); @@ -6463,7 +6470,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file) update_cond_flag(file); trace_event_trigger_enable_disable(file, 0); if (test->ops->free) - test->ops->free(test->ops, test); + test->ops->free(test); } } } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 7eb9d04f1c2e..cb866c3141af 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -188,7 +188,7 @@ static int trigger_show(struct seq_file *m, void *v) } data = list_entry(v, struct event_trigger_data, list); - data->ops->print(m, data->ops, data); + data->ops->print(m, data); return 0; } @@ -432,7 +432,6 @@ event_trigger_print(const char *name, struct seq_file *m, /** * event_trigger_init - Generic event_trigger_ops @init implementation - * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data * * Common implementation of event trigger initialization. @@ -442,8 +441,7 @@ event_trigger_print(const char *name, struct seq_file *m, * * Return: 0 on success, errno otherwise */ -int event_trigger_init(struct event_trigger_ops *ops, - struct event_trigger_data *data) +int event_trigger_init(struct event_trigger_data *data) { data->ref++; return 0; @@ -451,7 +449,6 @@ int event_trigger_init(struct event_trigger_ops *ops, /** * event_trigger_free - Generic event_trigger_ops @free implementation - * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data * * Common implementation of event trigger de-initialization. @@ -460,8 +457,7 @@ int event_trigger_init(struct event_trigger_ops *ops, * implementations. */ static void -event_trigger_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +event_trigger_free(struct event_trigger_data *data) { if (WARN_ON_ONCE(data->ref <= 0)) return; @@ -515,7 +511,7 @@ clear_event_triggers(struct trace_array *tr) trace_event_trigger_enable_disable(file, 0); list_del_rcu(&data->list); if (data->ops->free) - data->ops->free(data->ops, data); + data->ops->free(data); } } } @@ -581,19 +577,18 @@ static int register_trigger(char *glob, } if (data->ops->init) { - ret = data->ops->init(data->ops, data); + ret = data->ops->init(data); if (ret < 0) goto out; } list_add_rcu(&data->list, &file->triggers); - ret++; update_cond_flag(file); - if (trace_event_trigger_enable_disable(file, 1) < 0) { + ret = trace_event_trigger_enable_disable(file, 1); + if (ret < 0) { list_del_rcu(&data->list); update_cond_flag(file); - ret--; } out: return ret; @@ -614,14 +609,13 @@ static void unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file) { - struct event_trigger_data *data; - bool unregistered = false; + struct event_trigger_data *data = NULL, *iter; lockdep_assert_held(&event_mutex); - list_for_each_entry(data, &file->triggers, list) { - if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { - unregistered = true; + list_for_each_entry(iter, &file->triggers, list) { + if (iter->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { + data = iter; list_del_rcu(&data->list); trace_event_trigger_enable_disable(file, 0); update_cond_flag(file); @@ -629,8 +623,8 @@ static void unregister_trigger(char *glob, } } - if (unregistered && data->ops->free) - data->ops->free(data->ops, data); + if (data && data->ops->free) + data->ops->free(data); } /* @@ -744,15 +738,15 @@ bool event_trigger_empty_param(const char *param) /** * event_trigger_separate_filter - separate an event trigger from a filter - * @param: The param string containing trigger and possibly filter - * @trigger: outparam, will be filled with a pointer to the trigger + * @param_and_filter: String containing trigger and possibly filter + * @param: outparam, will be filled with a pointer to the trigger * @filter: outparam, will be filled with a pointer to the filter * @param_required: Specifies whether or not the param string is required * * Given a param string of the form '[trigger] [if filter]', this * function separates the filter from the trigger and returns the - * trigger in *trigger and the filter in *filter. Either the *trigger - * or the *filter may be set to NULL by this function - if not set to + * trigger in @param and the filter in @filter. Either the @param + * or the @filter may be set to NULL by this function - if not set to * NULL, they will contain strings corresponding to the trigger and * filter. * @@ -927,48 +921,37 @@ void event_trigger_reset_filter(struct event_command *cmd_ops, * @cmd_ops: The event_command operations for the trigger * @file: The event file for the trigger's event * @glob: The trigger command string, with optional remove(!) operator - * @cmd: The cmd string - * @param: The param string * @trigger_data: The trigger_data for the trigger - * @n_registered: optional outparam, the number of triggers registered * * Register an event trigger. The @cmd_ops are used to call the - * cmd_ops->reg() function which actually does the registration. The - * cmd_ops->reg() function returns the number of triggers registered, - * which is assigned to n_registered, if n_registered is non-NULL. + * cmd_ops->reg() function which actually does the registration. * * Return: 0 on success, errno otherwise */ int event_trigger_register(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, - char *cmd, - char *param, - struct event_trigger_data *trigger_data, - int *n_registered) + struct event_trigger_data *trigger_data) { - int ret; - - if (n_registered) - *n_registered = 0; - - ret = cmd_ops->reg(glob, trigger_data, file); - /* - * The above returns on success the # of functions enabled, - * but if it didn't find any functions it returns zero. - * Consider no functions a failure too. - */ - if (!ret) { - cmd_ops->unreg(glob, trigger_data, file); - ret = -ENOENT; - } else if (ret > 0) { - if (n_registered) - *n_registered = ret; - /* Just return zero, not the number of enabled functions */ - ret = 0; - } + return cmd_ops->reg(glob, trigger_data, file); +} - return ret; +/** + * event_trigger_unregister - unregister an event trigger + * @cmd_ops: The event_command operations for the trigger + * @file: The event file for the trigger's event + * @glob: The trigger command string, with optional remove(!) operator + * @trigger_data: The trigger_data for the trigger + * + * Unregister an event trigger. The @cmd_ops are used to call the + * cmd_ops->unreg() function which actually does the unregistration. + */ +void event_trigger_unregister(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + struct event_trigger_data *trigger_data) +{ + cmd_ops->unreg(glob, trigger_data, file); } /* @@ -981,7 +964,7 @@ int event_trigger_register(struct event_command *cmd_ops, * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger * @cmd: The cmd portion of the string used to register the trigger - * @param: The params portion of the string used to register the trigger + * @param_and_filter: The param and filter portion of the string used to register the trigger * * Common implementation for event command parsing and trigger * instantiation. @@ -994,94 +977,53 @@ int event_trigger_register(struct event_command *cmd_ops, static int event_trigger_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param) + char *glob, char *cmd, char *param_and_filter) { struct event_trigger_data *trigger_data; - struct event_trigger_ops *trigger_ops; - char *trigger = NULL; - char *number; + char *param, *filter; + bool remove; int ret; - /* separate the trigger from the filter (t:n [if filter]) */ - if (param && isdigit(param[0])) { - trigger = strsep(¶m, " \t"); - if (param) { - param = skip_spaces(param); - if (!*param) - param = NULL; - } - } + remove = event_trigger_check_remove(glob); - trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + ret = event_trigger_separate_filter(param_and_filter, ¶m, &filter, false); + if (ret) + return ret; ret = -ENOMEM; - trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + trigger_data = event_trigger_alloc(cmd_ops, cmd, param, file); if (!trigger_data) goto out; - trigger_data->count = -1; - trigger_data->ops = trigger_ops; - trigger_data->cmd_ops = cmd_ops; - trigger_data->private_data = file; - INIT_LIST_HEAD(&trigger_data->list); - INIT_LIST_HEAD(&trigger_data->named_list); - - if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_data, file); + if (remove) { + event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); kfree(trigger_data); ret = 0; goto out; } - if (trigger) { - number = strsep(&trigger, ":"); - - ret = -EINVAL; - if (!strlen(number)) - goto out_free; - - /* - * We use the callback data field (which is a pointer) - * as our counter. - */ - ret = kstrtoul(number, 0, &trigger_data->count); - if (ret) - goto out_free; - } - - if (!param) /* if param is non-empty, it's supposed to be a filter */ - goto out_reg; - - if (!cmd_ops->set_filter) - goto out_reg; + ret = event_trigger_parse_num(param, trigger_data); + if (ret) + goto out_free; - ret = cmd_ops->set_filter(param, trigger_data, file); + ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data); if (ret < 0) goto out_free; - out_reg: /* Up the trigger_data count to make sure reg doesn't free it on failure */ - event_trigger_init(trigger_ops, trigger_data); - ret = cmd_ops->reg(glob, trigger_data, file); - /* - * The above returns on success the # of functions enabled, - * but if it didn't find any functions it returns zero. - * Consider no functions a failure too. - */ - if (!ret) { - cmd_ops->unreg(glob, trigger_data, file); - ret = -ENOENT; - } else if (ret > 0) - ret = 0; + event_trigger_init(trigger_data); + + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret) + goto out_free; /* Down the counter of trigger_data or free it if not used anymore */ - event_trigger_free(trigger_ops, trigger_data); + event_trigger_free(trigger_data); out: return ret; out_free: - if (cmd_ops->set_filter) - cmd_ops->set_filter(NULL, trigger_data, NULL); + event_trigger_reset_filter(cmd_ops, trigger_data); kfree(trigger_data); goto out; } @@ -1401,16 +1343,14 @@ traceoff_count_trigger(struct event_trigger_data *data, } static int -traceon_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, - struct event_trigger_data *data) +traceon_trigger_print(struct seq_file *m, struct event_trigger_data *data) { return event_trigger_print("traceon", m, (void *)data->count, data->filter_str); } static int -traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, - struct event_trigger_data *data) +traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data) { return event_trigger_print("traceoff", m, (void *)data->count, data->filter_str); @@ -1521,8 +1461,7 @@ register_snapshot_trigger(char *glob, } static int -snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, - struct event_trigger_data *data) +snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data) { return event_trigger_print("snapshot", m, (void *)data->count, data->filter_str); @@ -1617,8 +1556,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, } static int -stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, - struct event_trigger_data *data) +stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data) { return event_trigger_print("stacktrace", m, (void *)data->count, data->filter_str); @@ -1708,7 +1646,6 @@ event_enable_count_trigger(struct event_trigger_data *data, } int event_enable_trigger_print(struct seq_file *m, - struct event_trigger_ops *ops, struct event_trigger_data *data) { struct enable_trigger_data *enable_data = data->private_data; @@ -1733,8 +1670,7 @@ int event_enable_trigger_print(struct seq_file *m, return 0; } -void event_enable_trigger_free(struct event_trigger_ops *ops, - struct event_trigger_data *data) +void event_enable_trigger_free(struct event_trigger_data *data) { struct enable_trigger_data *enable_data = data->private_data; @@ -1781,39 +1717,33 @@ static struct event_trigger_ops event_disable_count_trigger_ops = { int event_enable_trigger_parse(struct event_command *cmd_ops, struct trace_event_file *file, - char *glob, char *cmd, char *param) + char *glob, char *cmd, char *param_and_filter) { struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; struct event_trigger_data *trigger_data; - struct event_trigger_ops *trigger_ops; struct trace_array *tr = file->tr; + char *param, *filter; + bool enable, remove; const char *system; const char *event; bool hist = false; - char *trigger; - char *number; - bool enable; int ret; - if (!param) - return -EINVAL; + remove = event_trigger_check_remove(glob); - /* separate the trigger from the filter (s:e:n [if filter]) */ - trigger = strsep(¶m, " \t"); - if (!trigger) + if (event_trigger_empty_param(param_and_filter)) return -EINVAL; - if (param) { - param = skip_spaces(param); - if (!*param) - param = NULL; - } - system = strsep(&trigger, ":"); - if (!trigger) + ret = event_trigger_separate_filter(param_and_filter, ¶m, &filter, true); + if (ret) + return ret; + + system = strsep(¶m, ":"); + if (!param) return -EINVAL; - event = strsep(&trigger, ":"); + event = strsep(¶m, ":"); ret = -EINVAL; event_enable_file = find_event_file(tr, system, event); @@ -1829,32 +1759,24 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, #else enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; #endif - trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); - ret = -ENOMEM; - trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); - if (!trigger_data) - goto out; enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL); - if (!enable_data) { - kfree(trigger_data); + if (!enable_data) goto out; - } - - trigger_data->count = -1; - trigger_data->ops = trigger_ops; - trigger_data->cmd_ops = cmd_ops; - INIT_LIST_HEAD(&trigger_data->list); - RCU_INIT_POINTER(trigger_data->filter, NULL); enable_data->hist = hist; enable_data->enable = enable; enable_data->file = event_enable_file; - trigger_data->private_data = enable_data; - if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_data, file); + trigger_data = event_trigger_alloc(cmd_ops, cmd, param, enable_data); + if (!trigger_data) { + kfree(enable_data); + goto out; + } + + if (remove) { + event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); kfree(trigger_data); kfree(enable_data); ret = 0; @@ -1862,35 +1784,16 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, } /* Up the trigger_data count to make sure nothing frees it on failure */ - event_trigger_init(trigger_ops, trigger_data); - - if (trigger) { - number = strsep(&trigger, ":"); - - ret = -EINVAL; - if (!strlen(number)) - goto out_free; - - /* - * We use the callback data field (which is a pointer) - * as our counter. - */ - ret = kstrtoul(number, 0, &trigger_data->count); - if (ret) - goto out_free; - } + event_trigger_init(trigger_data); - if (!param) /* if param is non-empty, it's supposed to be a filter */ - goto out_reg; - - if (!cmd_ops->set_filter) - goto out_reg; + ret = event_trigger_parse_num(param, trigger_data); + if (ret) + goto out_free; - ret = cmd_ops->set_filter(param, trigger_data, file); + ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data); if (ret < 0) goto out_free; - out_reg: /* Don't let event modules unload while probe registered */ ret = trace_event_try_get_ref(event_enable_file->event_call); if (!ret) { @@ -1901,32 +1804,23 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, ret = trace_event_enable_disable(event_enable_file, 1, 1); if (ret < 0) goto out_put; - ret = cmd_ops->reg(glob, trigger_data, file); - /* - * The above returns on success the # of functions enabled, - * but if it didn't find any functions it returns zero. - * Consider no functions a failure too. - */ - if (!ret) { - ret = -ENOENT; - goto out_disable; - } else if (ret < 0) + + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret) goto out_disable; - /* Just return zero, not the number of enabled functions */ - ret = 0; - event_trigger_free(trigger_ops, trigger_data); + + event_trigger_free(trigger_data); out: return ret; - out_disable: trace_event_enable_disable(event_enable_file, 0, 1); out_put: trace_event_put_ref(event_enable_file->event_call); out_free: - if (cmd_ops->set_filter) - cmd_ops->set_filter(NULL, trigger_data, NULL); - event_trigger_free(trigger_ops, trigger_data); + event_trigger_reset_filter(cmd_ops, trigger_data); + event_trigger_free(trigger_data); kfree(enable_data); + goto out; } @@ -1953,19 +1847,18 @@ int event_enable_register_trigger(char *glob, } if (data->ops->init) { - ret = data->ops->init(data->ops, data); + ret = data->ops->init(data); if (ret < 0) goto out; } list_add_rcu(&data->list, &file->triggers); - ret++; update_cond_flag(file); - if (trace_event_trigger_enable_disable(file, 1) < 0) { + ret = trace_event_trigger_enable_disable(file, 1); + if (ret < 0) { list_del_rcu(&data->list); update_cond_flag(file); - ret--; } out: return ret; @@ -1976,19 +1869,18 @@ void event_enable_unregister_trigger(char *glob, struct trace_event_file *file) { struct enable_trigger_data *test_enable_data = test->private_data; + struct event_trigger_data *data = NULL, *iter; struct enable_trigger_data *enable_data; - struct event_trigger_data *data; - bool unregistered = false; lockdep_assert_held(&event_mutex); - list_for_each_entry(data, &file->triggers, list) { - enable_data = data->private_data; + list_for_each_entry(iter, &file->triggers, list) { + enable_data = iter->private_data; if (enable_data && - (data->cmd_ops->trigger_type == + (iter->cmd_ops->trigger_type == test->cmd_ops->trigger_type) && (enable_data->file == test_enable_data->file)) { - unregistered = true; + data = iter; list_del_rcu(&data->list); trace_event_trigger_enable_disable(file, 0); update_cond_flag(file); @@ -1996,8 +1888,8 @@ void event_enable_unregister_trigger(char *glob, } } - if (unregistered && data->ops->free) - data->ops->free(data->ops, data); + if (data && data->ops->free) + data->ops->free(data); } static struct event_trigger_ops * diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 47cebef78532..93507330462c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1907,25 +1907,18 @@ core_initcall(init_kprobe_trace_early); static __init int init_kprobe_trace(void) { int ret; - struct dentry *entry; ret = tracing_init_dentry(); if (ret) return 0; - entry = tracefs_create_file("kprobe_events", TRACE_MODE_WRITE, - NULL, NULL, &kprobe_events_ops); - /* Event list interface */ - if (!entry) - pr_warn("Could not create tracefs 'kprobe_events' entry\n"); + trace_create_file("kprobe_events", TRACE_MODE_WRITE, + NULL, NULL, &kprobe_events_ops); /* Profile interface */ - entry = tracefs_create_file("kprobe_profile", TRACE_MODE_READ, - NULL, NULL, &kprobe_profile_ops); - - if (!entry) - pr_warn("Could not create tracefs 'kprobe_profile' entry\n"); + trace_create_file("kprobe_profile", TRACE_MODE_READ, + NULL, NULL, &kprobe_profile_ops); setup_boot_kprobe_events(); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index afb92e2f0aea..313439920a8c 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1578,11 +1578,27 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) trace_timerlat_sample(&s); - notify_new_max_latency(diff); + if (osnoise_data.stop_tracing) { + if (time_to_us(diff) >= osnoise_data.stop_tracing) { + + /* + * At this point, if stop_tracing is set and <= print_stack, + * print_stack is set and would be printed in the thread handler. + * + * Thus, print the stack trace as it is helpful to define the + * root cause of an IRQ latency. + */ + if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { + timerlat_save_stack(0); + timerlat_dump_stack(time_to_us(diff)); + } - if (osnoise_data.stop_tracing) - if (time_to_us(diff) >= osnoise_data.stop_tracing) osnoise_stop_tracing(); + notify_new_max_latency(diff); + + return HRTIMER_NORESTART; + } + } wake_up_process(tlat->kthread); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8aa493d25c73..67f47ea27921 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -692,7 +692,7 @@ static LIST_HEAD(ftrace_event_list); static int trace_search_list(struct list_head **list) { - struct trace_event *e; + struct trace_event *e = NULL, *iter; int next = __TRACE_LAST_TYPE; if (list_empty(&ftrace_event_list)) { @@ -704,9 +704,11 @@ static int trace_search_list(struct list_head **list) * We used up all possible max events, * lets see if somebody freed one. */ - list_for_each_entry(e, &ftrace_event_list, list) { - if (e->type != next) + list_for_each_entry(iter, &ftrace_event_list, list) { + if (iter->type != next) { + e = iter; break; + } next++; } @@ -714,7 +716,10 @@ static int trace_search_list(struct list_head **list) if (next > TRACE_EVENT_TYPE_MAX) return 0; - *list = &e->list; + if (e) + *list = &e->list; + else + *list = &ftrace_event_list; return next; } @@ -778,9 +783,8 @@ int register_trace_event(struct trace_event *event) list_add_tail(&event->list, list); - } else if (event->type > __TRACE_LAST_TYPE) { - printk(KERN_WARNING "Need to add type to trace.h\n"); - WARN_ON(1); + } else if (WARN(event->type > __TRACE_LAST_TYPE, + "Need to add type to trace.h")) { goto out; } else { /* Is this event already used */ @@ -1571,13 +1575,8 @@ __init static int init_events(void) for (i = 0; events[i]; i++) { event = events[i]; - ret = register_trace_event(event); - if (!ret) { - printk(KERN_WARNING "event %d failed to register\n", - event->type); - WARN_ON_ONCE(1); - } + WARN_ONCE(!ret, "event %d failed to register", event->type); } return 0; diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c index 4d4b78c8ca25..a520b11afb0d 100644 --- a/kernel/trace/trace_recursion_record.c +++ b/kernel/trace/trace_recursion_record.c @@ -224,12 +224,9 @@ static const struct file_operations recursed_functions_fops = { __init static int create_recursed_functions(void) { - struct dentry *dentry; - dentry = trace_create_file("recursed_functions", TRACE_MODE_WRITE, - NULL, NULL, &recursed_functions_fops); - if (!dentry) - pr_warn("WARNING: Failed to create recursed_functions\n"); + trace_create_file("recursed_functions", TRACE_MODE_WRITE, + NULL, NULL, &recursed_functions_fops); return 0; } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index abcadbe933bb..a2d301f58ced 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -895,6 +895,9 @@ trace_selftest_startup_function_graph(struct tracer *trace, ret = -1; goto out; } + + /* Enable tracing on all functions again */ + ftrace_set_global_filter(NULL, 0, 1); #endif /* Don't test dynamic tracing, the function tracer already did */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f755bde42fd0..b69e207012c9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -154,7 +154,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags, goto end; /* parameter types */ - if (tr->trace_flags & TRACE_ITER_VERBOSE) + if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) trace_seq_printf(s, "%s ", entry->types[i]); /* parameter values */ @@ -296,9 +296,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct trace_event_file *trace_file; struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; - struct ring_buffer_event *event; - struct trace_buffer *buffer; - unsigned int trace_ctx; + struct trace_event_buffer fbuffer; unsigned long args[6]; int syscall_nr; int size; @@ -321,20 +319,16 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - trace_ctx = tracing_gen_ctx(); - - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - sys_data->enter_event->event.type, size, trace_ctx); - if (!event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); + if (!entry) return; - entry = ring_buffer_event_data(event); + entry = ring_buffer_event_data(fbuffer.event); entry->nr = syscall_nr; syscall_get_arguments(current, regs, args); memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); - event_trigger_unlock_commit(trace_file, buffer, event, entry, - trace_ctx); + trace_event_buffer_commit(&fbuffer); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,9 +337,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct trace_event_file *trace_file; struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; - struct ring_buffer_event *event; - struct trace_buffer *buffer; - unsigned int trace_ctx; + struct trace_event_buffer fbuffer; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -364,20 +356,15 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - trace_ctx = tracing_gen_ctx(); - - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - sys_data->exit_event->event.type, sizeof(*entry), - trace_ctx); - if (!event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry)); + if (!entry) return; - entry = ring_buffer_event_data(event); + entry = ring_buffer_event_data(fbuffer.event); entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - event_trigger_unlock_commit(trace_file, buffer, event, entry, - trace_ctx); + trace_event_buffer_commit(&fbuffer); } static int reg_event_syscall_enter(struct trace_event_file *file, diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 9628b5571846..9901708ce6b8 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -1045,7 +1045,8 @@ static void sort_secondary(struct tracing_map *map, /** * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map * @map: The tracing_map - * @sort_key: The sort key to use for sorting + * @sort_keys: The sort key to use for sorting + * @n_sort_keys: hitcount, always have at least one * @sort_entries: outval: pointer to allocated and sorted array of entries * * tracing_map_sort_entries() sorts the current set of entries in the diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 1d261fbe367b..4252f0645b9e 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -23,15 +23,20 @@ void bacct_add_tsk(struct user_namespace *user_ns, { const struct cred *tcred; u64 utime, stime, utimescaled, stimescaled; - u64 delta; + u64 now_ns, delta; time64_t btime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); /* calculate task elapsed time in nsec */ - delta = ktime_get_ns() - tsk->start_time; + now_ns = ktime_get_ns(); + /* store whole group time first */ + delta = now_ns - tsk->group_leader->start_time; /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); + stats->ac_tgetime = delta; + delta = now_ns - tsk->start_time; + do_div(delta, NSEC_PER_USEC); stats->ac_etime = delta; /* Convert to seconds for btime (note y2106 limit) */ btime = ktime_get_real_seconds() - div_u64(delta, USEC_PER_SEC); @@ -51,6 +56,7 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; stats->ac_pid = task_pid_nr_ns(tsk, pid_ns); + stats->ac_tgid = task_tgid_nr_ns(tsk, pid_ns); rcu_read_lock(); tcred = __task_cred(tsk); stats->ac_uid = from_kuid_munged(user_ns, tcred->uid); diff --git a/kernel/umh.c b/kernel/umh.c index 36c123360ab8..b989736e8707 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -132,7 +132,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) /* If SIGCLD is ignored do_wait won't populate the status. */ kernel_sigaction(SIGCHLD, SIG_DFL); - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); + pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); if (pid < 0) sub_info->retval = pid; else @@ -171,8 +171,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work) * want to pollute current->children, and we need a parent * that always ignores SIGCHLD to ensure auto-reaping. */ - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, - CLONE_PARENT | SIGCHLD); + pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, + CLONE_PARENT | SIGCHLD); if (pid < 0) { sub_info->retval = pid; umh_complete(sub_info); diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index 9dae1f648713..8303f4c7ca71 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -28,7 +28,7 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na file = file_open_root_mnt(mnt, name, O_CREAT | O_WRONLY, 0700); if (IS_ERR(file)) { - mntput(mnt); + kern_unmount(mnt); return ERR_CAST(file); } @@ -38,7 +38,7 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na if (err >= 0) err = -ENOMEM; filp_close(file, NULL); - mntput(mnt); + kern_unmount(mnt); return ERR_PTR(err); } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 40024e03d422..20a7a55e62b6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -57,7 +57,7 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace; * Should we panic when a soft-lockup or hard-lockup occurs: */ unsigned int __read_mostly hardlockup_panic = - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; + IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC); /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these @@ -168,7 +168,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly; /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = - CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; + IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC); static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; |
