summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c14
-rw-r--r--kernel/crash_core.c3
-rw-r--r--kernel/events/core.c1
-rw-r--r--kernel/fork.c46
-rw-r--r--kernel/hung_task.c2
-rw-r--r--kernel/kcov.c14
-rw-r--r--kernel/kexec_core.c2
-rw-r--r--kernel/kexec_file.c38
-rw-r--r--kernel/kprobes.c144
-rw-r--r--kernel/livepatch/patch.c2
-rw-r--r--kernel/module/signing.c3
-rw-r--r--kernel/notifier.c101
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/hibernate.c2
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/ptrace.c93
-rw-r--r--kernel/reboot.c348
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/signal.c140
-rw-r--r--kernel/taskstats.c24
-rw-r--r--kernel/time/posix-cpu-timers.c6
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/fgraph.c2
-rw-r--r--kernel/trace/ftrace.c210
-rw-r--r--kernel/trace/pid_list.c4
-rw-r--r--kernel/trace/ring_buffer.c81
-rw-r--r--kernel/trace/trace.c72
-rw-r--r--kernel/trace/trace.h26
-rw-r--r--kernel/trace/trace_boot.c2
-rw-r--r--kernel/trace/trace_dynevent.c9
-rw-r--r--kernel/trace/trace_eprobe.c24
-rw-r--r--kernel/trace/trace_events.c69
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/trace/trace_events_hist.c191
-rw-r--r--kernel/trace/trace_events_trigger.c324
-rw-r--r--kernel/trace/trace_kprobe.c15
-rw-r--r--kernel/trace/trace_osnoise.c22
-rw-r--r--kernel/trace/trace_output.c25
-rw-r--r--kernel/trace/trace_recursion_record.c7
-rw-r--r--kernel/trace/trace_selftest.c3
-rw-r--r--kernel/trace/trace_syscalls.c35
-rw-r--r--kernel/trace/tracing_map.c3
-rw-r--r--kernel/tsacct.c10
-rw-r--r--kernel/umh.c6
-rw-r--r--kernel/usermode_driver.c4
-rw-r--r--kernel/watchdog.c4
50 files changed, 1307 insertions, 851 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index cacd8684c3c4..5f6f3f829b36 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1953,6 +1953,11 @@ out:
CONT; \
LDX_MEM_##SIZEOP: \
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
+ CONT; \
+ LDX_PROBE_MEM_##SIZEOP: \
+ bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
+ DST = *((SIZE *)&DST); \
CONT;
LDST(B, u8)
@@ -1960,15 +1965,6 @@ out:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
-#define LDX_PROBE(SIZEOP, SIZE) \
- LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \
- CONT;
- LDX_PROBE(B, 1)
- LDX_PROBE(H, 2)
- LDX_PROBE(W, 4)
- LDX_PROBE(DW, 8)
-#undef LDX_PROBE
#define ATOMIC_ALU_OP(BOP, KOP) \
case BOP: \
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4d57c03714f4..71122e01623c 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -222,9 +222,6 @@ next:
p = strstr(p+1, name);
}
- if (!ck_cmdline)
- return NULL;
-
return ck_cmdline;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 950b25c3f210..80782cddb1da 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4257,7 +4257,6 @@ static void perf_event_remove_on_exec(int ctxn)
{
struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *event, *next;
- LIST_HEAD(free_list);
unsigned long flags;
bool modified = false;
diff --git a/kernel/fork.c b/kernel/fork.c
index 124829ed0163..9d44f2d46c69 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1982,7 +1982,7 @@ static __latent_entropy struct task_struct *copy_process(
struct task_struct *p;
struct multiprocess_signals delayed;
struct file *pidfile = NULL;
- u64 clone_flags = args->flags;
+ const u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;
/*
@@ -2071,6 +2071,9 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ p->flags &= ~PF_KTHREAD;
+ if (args->kthread)
+ p->flags |= PF_KTHREAD;
if (args->io_thread) {
/*
* Mark us an IO worker, and block any signal that isn't
@@ -2160,7 +2163,7 @@ static __latent_entropy struct task_struct *copy_process(
p->io_context = NULL;
audit_set_context(p, NULL);
cgroup_fork(p);
- if (p->flags & PF_KTHREAD) {
+ if (args->kthread) {
if (!set_kthread_struct(p))
goto bad_fork_cleanup_delayacct;
}
@@ -2243,7 +2246,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
- retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
+ retval = copy_thread(p, args);
if (retval)
goto bad_fork_cleanup_io;
@@ -2547,11 +2550,21 @@ static inline void init_idle_pids(struct task_struct *idle)
}
}
+static int idle_dummy(void *dummy)
+{
+ /* This function is never called */
+ return 0;
+}
+
struct task_struct * __init fork_idle(int cpu)
{
struct task_struct *task;
struct kernel_clone_args args = {
- .flags = CLONE_VM,
+ .flags = CLONE_VM,
+ .fn = &idle_dummy,
+ .fn_arg = NULL,
+ .kthread = 1,
+ .idle = 1,
};
task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
@@ -2582,8 +2595,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
- .stack = (unsigned long)fn,
- .stack_size = (unsigned long)arg,
+ .fn = fn,
+ .fn_arg = arg,
.io_thread = 1,
};
@@ -2687,8 +2700,25 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
- .stack = (unsigned long)fn,
- .stack_size = (unsigned long)arg,
+ .fn = fn,
+ .fn_arg = arg,
+ .kthread = 1,
+ };
+
+ return kernel_clone(&args);
+}
+
+/*
+ * Create a user mode thread.
+ */
+pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
+ .fn = fn,
+ .fn_arg = arg,
};
return kernel_clone(&args);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 02a65d554340..80bfea5dd5c4 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -73,7 +73,7 @@ static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
* hung task is detected:
*/
unsigned int __read_mostly sysctl_hung_task_panic =
- CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+ IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC);
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/kernel/kcov.c b/kernel/kcov.c
index b3732b210593..e19c84b02452 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -204,8 +204,16 @@ void notrace __sanitizer_cov_trace_pc(void)
/* The first 64-bit word is the number of subsequent PCs. */
pos = READ_ONCE(area[0]) + 1;
if (likely(pos < t->kcov_size)) {
- area[pos] = ip;
+ /* Previously we write pc before updating pos. However, some
+ * early interrupt code could bypass check_kcov_mode() check
+ * and invoke __sanitizer_cov_trace_pc(). If such interrupt is
+ * raised between writing pc and updating pos, the pc could be
+ * overitten by the recursive __sanitizer_cov_trace_pc().
+ * Update pos before writing pc to avoid such interleaving.
+ */
WRITE_ONCE(area[0], pos);
+ barrier();
+ area[pos] = ip;
}
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
@@ -236,11 +244,13 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
start_index = 1 + count * KCOV_WORDS_PER_CMP;
end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64);
if (likely(end_pos <= max_pos)) {
+ /* See comment in __sanitizer_cov_trace_pc(). */
+ WRITE_ONCE(area[0], count + 1);
+ barrier();
area[start_index] = type;
area[start_index + 1] = arg1;
area[start_index + 2] = arg2;
area[start_index + 3] = ip;
- WRITE_ONCE(area[0], count + 1);
}
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8b3e9a2014cf..4d34c78334ce 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -768,7 +768,6 @@ static struct page *kimage_alloc_page(struct kimage *image,
kimage_free_pages(old_page);
continue;
}
- addr = old_addr;
page = old_page;
break;
}
@@ -788,7 +787,6 @@ static int kimage_load_normal_segment(struct kimage *image,
unsigned char __user *buf = NULL;
unsigned char *kbuf = NULL;
- result = 0;
if (image->file_mode)
kbuf = segment->kbuf;
else
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 8347fc158d2b..145321a5e798 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -109,40 +109,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
#endif
/*
- * arch_kexec_apply_relocations_add - apply relocations of type RELA
- * @pi: Purgatory to be relocated.
- * @section: Section relocations applying to.
- * @relsec: Section containing RELAs.
- * @symtab: Corresponding symtab.
- *
- * Return: 0 on success, negative errno on error.
- */
-int __weak
-arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
-{
- pr_err("RELA relocation unsupported.\n");
- return -ENOEXEC;
-}
-
-/*
- * arch_kexec_apply_relocations - apply relocations of type REL
- * @pi: Purgatory to be relocated.
- * @section: Section relocations applying to.
- * @relsec: Section containing RELs.
- * @symtab: Corresponding symtab.
- *
- * Return: 0 on success, negative errno on error.
- */
-int __weak
-arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
-{
- pr_err("REL relocation unsupported.\n");
- return -ENOEXEC;
-}
-
-/*
* Free up memory used by kernel, initrd, and command line. This is temporary
* memory allocation which is not needed any more after these buffers have
* been loaded into separate segments and have been copied elsewhere.
@@ -1260,7 +1226,7 @@ int crash_exclude_mem_range(struct crash_mem *mem,
return 0;
}
-int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
void **addr, unsigned long *sz)
{
Elf64_Ehdr *ehdr;
@@ -1324,7 +1290,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
phdr++;
/* Prepare PT_LOAD type program header for kernel text region */
- if (kernel_map) {
+ if (need_kernel_map) {
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_vaddr = (unsigned long) _text;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index dd58c0be9ce2..f214f8c088ed 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1257,79 +1257,6 @@ void kprobe_busy_end(void)
preempt_enable();
}
-#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
-static void free_rp_inst_rcu(struct rcu_head *head)
-{
- struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
-
- if (refcount_dec_and_test(&ri->rph->ref))
- kfree(ri->rph);
- kfree(ri);
-}
-NOKPROBE_SYMBOL(free_rp_inst_rcu);
-
-static void recycle_rp_inst(struct kretprobe_instance *ri)
-{
- struct kretprobe *rp = get_kretprobe(ri);
-
- if (likely(rp))
- freelist_add(&ri->freelist, &rp->freelist);
- else
- call_rcu(&ri->rcu, free_rp_inst_rcu);
-}
-NOKPROBE_SYMBOL(recycle_rp_inst);
-
-/*
- * This function is called from delayed_put_task_struct() when a task is
- * dead and cleaned up to recycle any kretprobe instances associated with
- * this task. These left over instances represent probed functions that
- * have been called but will never return.
- */
-void kprobe_flush_task(struct task_struct *tk)
-{
- struct kretprobe_instance *ri;
- struct llist_node *node;
-
- /* Early boot, not yet initialized. */
- if (unlikely(!kprobes_initialized))
- return;
-
- kprobe_busy_begin();
-
- node = __llist_del_all(&tk->kretprobe_instances);
- while (node) {
- ri = container_of(node, struct kretprobe_instance, llist);
- node = node->next;
-
- recycle_rp_inst(ri);
- }
-
- kprobe_busy_end();
-}
-NOKPROBE_SYMBOL(kprobe_flush_task);
-
-static inline void free_rp_inst(struct kretprobe *rp)
-{
- struct kretprobe_instance *ri;
- struct freelist_node *node;
- int count = 0;
-
- node = rp->freelist.head;
- while (node) {
- ri = container_of(node, struct kretprobe_instance, freelist);
- node = node->next;
-
- kfree(ri);
- count++;
- }
-
- if (refcount_sub_and_test(count, &rp->rph->ref)) {
- kfree(rp->rph);
- rp->rph = NULL;
- }
-}
-#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
-
/* Add the new probe to 'ap->list'. */
static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
{
@@ -1928,6 +1855,77 @@ static struct notifier_block kprobe_exceptions_nb = {
#ifdef CONFIG_KRETPROBES
#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
+static void free_rp_inst_rcu(struct rcu_head *head)
+{
+ struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
+
+ if (refcount_dec_and_test(&ri->rph->ref))
+ kfree(ri->rph);
+ kfree(ri);
+}
+NOKPROBE_SYMBOL(free_rp_inst_rcu);
+
+static void recycle_rp_inst(struct kretprobe_instance *ri)
+{
+ struct kretprobe *rp = get_kretprobe(ri);
+
+ if (likely(rp))
+ freelist_add(&ri->freelist, &rp->freelist);
+ else
+ call_rcu(&ri->rcu, free_rp_inst_rcu);
+}
+NOKPROBE_SYMBOL(recycle_rp_inst);
+
+/*
+ * This function is called from delayed_put_task_struct() when a task is
+ * dead and cleaned up to recycle any kretprobe instances associated with
+ * this task. These left over instances represent probed functions that
+ * have been called but will never return.
+ */
+void kprobe_flush_task(struct task_struct *tk)
+{
+ struct kretprobe_instance *ri;
+ struct llist_node *node;
+
+ /* Early boot, not yet initialized. */
+ if (unlikely(!kprobes_initialized))
+ return;
+
+ kprobe_busy_begin();
+
+ node = __llist_del_all(&tk->kretprobe_instances);
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
+ node = node->next;
+
+ recycle_rp_inst(ri);
+ }
+
+ kprobe_busy_end();
+}
+NOKPROBE_SYMBOL(kprobe_flush_task);
+
+static inline void free_rp_inst(struct kretprobe *rp)
+{
+ struct kretprobe_instance *ri;
+ struct freelist_node *node;
+ int count = 0;
+
+ node = rp->freelist.head;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, freelist);
+ node = node->next;
+
+ kfree(ri);
+ count++;
+ }
+
+ if (refcount_sub_and_test(count, &rp->rph->ref)) {
+ kfree(rp->rph);
+ rp->rph = NULL;
+ }
+}
+
/* This assumes the 'tsk' is the current task or the is not running. */
static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
struct llist_node **cur)
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index c172bf92b576..4c4f5a776d80 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -118,7 +118,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
if (func->nop)
goto unlock;
- klp_arch_set_pc(fregs, (unsigned long)func->new_func);
+ ftrace_instruction_pointer_set(fregs, (unsigned long)func->new_func);
unlock:
ftrace_test_recursion_unlock(bit);
diff --git a/kernel/module/signing.c b/kernel/module/signing.c
index 85c8999dfecf..a2ff4242e623 100644
--- a/kernel/module/signing.c
+++ b/kernel/module/signing.c
@@ -16,6 +16,9 @@
#include <uapi/linux/module.h>
#include "internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "module."
+
static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
module_param(sig_enforce, bool_enable_only, 0644);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index ba005ebf4730..0d5bd62c480e 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -20,7 +20,8 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
*/
static int notifier_chain_register(struct notifier_block **nl,
- struct notifier_block *n)
+ struct notifier_block *n,
+ bool unique_priority)
{
while ((*nl) != NULL) {
if (unlikely((*nl) == n)) {
@@ -30,6 +31,8 @@ static int notifier_chain_register(struct notifier_block **nl,
}
if (n->priority > (*nl)->priority)
break;
+ if (n->priority == (*nl)->priority && unique_priority)
+ return -EBUSY;
nl = &((*nl)->next);
}
n->next = *nl;
@@ -144,13 +147,36 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
int ret;
spin_lock_irqsave(&nh->lock, flags);
- ret = notifier_chain_register(&nh->head, n);
+ ret = notifier_chain_register(&nh->head, n, false);
spin_unlock_irqrestore(&nh->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
/**
+ * atomic_notifier_chain_register_unique_prio - Add notifier to an atomic notifier chain
+ * @nh: Pointer to head of the atomic notifier chain
+ * @n: New entry in notifier chain
+ *
+ * Adds a notifier to an atomic notifier chain if there is no other
+ * notifier registered using the same priority.
+ *
+ * Returns 0 on success, %-EEXIST or %-EBUSY on error.
+ */
+int atomic_notifier_chain_register_unique_prio(struct atomic_notifier_head *nh,
+ struct notifier_block *n)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&nh->lock, flags);
+ ret = notifier_chain_register(&nh->head, n, true);
+ spin_unlock_irqrestore(&nh->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register_unique_prio);
+
+/**
* atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
* @nh: Pointer to head of the atomic notifier chain
* @n: Entry to remove from notifier chain
@@ -204,23 +230,27 @@ int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
NOKPROBE_SYMBOL(atomic_notifier_call_chain);
+/**
+ * atomic_notifier_call_chain_is_empty - Check whether notifier chain is empty
+ * @nh: Pointer to head of the atomic notifier chain
+ *
+ * Checks whether notifier chain is empty.
+ *
+ * Returns true is notifier chain is empty, false otherwise.
+ */
+bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh)
+{
+ return !rcu_access_pointer(nh->head);
+}
+
/*
* Blocking notifier chain routines. All access to the chain is
* synchronized by an rwsem.
*/
-/**
- * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
- * @nh: Pointer to head of the blocking notifier chain
- * @n: New entry in notifier chain
- *
- * Adds a notifier to a blocking notifier chain.
- * Must be called in process context.
- *
- * Returns 0 on success, %-EEXIST on error.
- */
-int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
- struct notifier_block *n)
+static int __blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+ struct notifier_block *n,
+ bool unique_priority)
{
int ret;
@@ -230,16 +260,49 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
* such times we must not call down_write().
*/
if (unlikely(system_state == SYSTEM_BOOTING))
- return notifier_chain_register(&nh->head, n);
+ return notifier_chain_register(&nh->head, n, unique_priority);
down_write(&nh->rwsem);
- ret = notifier_chain_register(&nh->head, n);
+ ret = notifier_chain_register(&nh->head, n, unique_priority);
up_write(&nh->rwsem);
return ret;
}
+
+/**
+ * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ * @nh: Pointer to head of the blocking notifier chain
+ * @n: New entry in notifier chain
+ *
+ * Adds a notifier to a blocking notifier chain.
+ * Must be called in process context.
+ *
+ * Returns 0 on success, %-EEXIST on error.
+ */
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+ struct notifier_block *n)
+{
+ return __blocking_notifier_chain_register(nh, n, false);
+}
EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
/**
+ * blocking_notifier_chain_register_unique_prio - Add notifier to a blocking notifier chain
+ * @nh: Pointer to head of the blocking notifier chain
+ * @n: New entry in notifier chain
+ *
+ * Adds a notifier to an blocking notifier chain if there is no other
+ * notifier registered using the same priority.
+ *
+ * Returns 0 on success, %-EEXIST or %-EBUSY on error.
+ */
+int blocking_notifier_chain_register_unique_prio(struct blocking_notifier_head *nh,
+ struct notifier_block *n)
+{
+ return __blocking_notifier_chain_register(nh, n, true);
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register_unique_prio);
+
+/**
* blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
* @nh: Pointer to head of the blocking notifier chain
* @n: Entry to remove from notifier chain
@@ -341,7 +404,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
int raw_notifier_chain_register(struct raw_notifier_head *nh,
struct notifier_block *n)
{
- return notifier_chain_register(&nh->head, n);
+ return notifier_chain_register(&nh->head, n, false);
}
EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
@@ -420,10 +483,10 @@ int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
* such times we must not call mutex_lock().
*/
if (unlikely(system_state == SYSTEM_BOOTING))
- return notifier_chain_register(&nh->head, n);
+ return notifier_chain_register(&nh->head, n, false);
mutex_lock(&nh->mutex);
- ret = notifier_chain_register(&nh->head, n);
+ ret = notifier_chain_register(&nh->head, n, false);
mutex_unlock(&nh->mutex);
return ret;
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a46a3723bc66..f4f8cb0435b4 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -52,7 +52,7 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
/* Name collision forces to do allocation under mutex. */
if (!*pkc)
*pkc = kmem_cache_create(name, len, 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
mutex_unlock(&pid_caches_mutex);
/* current can fail, but someone else can succeed. */
return READ_ONCE(*pkc);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 938d5c78b421..20a66bf9f465 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -83,7 +83,7 @@ bool hibernation_available(void)
{
return nohibernate == 0 &&
!security_locked_down(LOCKDOWN_HIBERNATION) &&
- !secretmem_active();
+ !secretmem_active() && !cxl_mem_active();
}
/**
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 5242bf2ee469..e3694034b753 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -127,7 +127,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr,
char *s = buf;
suspend_state_t i;
- for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+ for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) {
+ if (i >= PM_SUSPEND_MEM && cxl_mem_active())
+ continue;
if (mem_sleep_states[i]) {
const char *label = mem_sleep_states[i];
@@ -136,6 +138,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr,
else
s += sprintf(s, "%s ", label);
}
+ }
/* Convert the last space to a newline if needed. */
if (s != buf)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6fcdee7e87a5..827075944d28 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -236,7 +236,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem);
static bool sleep_state_supported(suspend_state_t state)
{
- return state == PM_SUSPEND_TO_IDLE || valid_state(state);
+ return state == PM_SUSPEND_TO_IDLE ||
+ (valid_state(state) && !cxl_mem_active());
}
static int platform_suspend_prepare(suspend_state_t state)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a3e1035929b0..ea3dd55709e7 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3904,7 +3904,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
}
if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible_all(&log_wait);
+ wake_up_interruptible(&log_wait);
}
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ccc4b465775b..156a99283b11 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(struct task_struct *task)
return true;
}
-/* Ensure that nothing can wake it up, even SIGKILL */
+/*
+ * Ensure that nothing can wake it up, even SIGKILL
+ *
+ * A task is switched to this state while a ptrace operation is in progress;
+ * such that the ptrace operation is uninterruptible.
+ */
static bool ptrace_freeze_traced(struct task_struct *task)
{
bool ret = false;
@@ -197,7 +202,7 @@ static bool ptrace_freeze_traced(struct task_struct *task)
spin_lock_irq(&task->sighand->siglock);
if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
!__fatal_signal_pending(task)) {
- WRITE_ONCE(task->__state, __TASK_TRACED);
+ task->jobctl |= JOBCTL_PTRACE_FROZEN;
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
@@ -207,23 +212,21 @@ static bool ptrace_freeze_traced(struct task_struct *task)
static void ptrace_unfreeze_traced(struct task_struct *task)
{
- if (READ_ONCE(task->__state) != __TASK_TRACED)
- return;
-
- WARN_ON(!task->ptrace || task->parent != current);
+ unsigned long flags;
/*
- * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
- * Recheck state under the lock to close this race.
+ * The child may be awake and may have cleared
+ * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will
+ * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew.
*/
- spin_lock_irq(&task->sighand->siglock);
- if (READ_ONCE(task->__state) == __TASK_TRACED) {
- if (__fatal_signal_pending(task))
+ if (lock_task_sighand(task, &flags)) {
+ task->jobctl &= ~JOBCTL_PTRACE_FROZEN;
+ if (__fatal_signal_pending(task)) {
+ task->jobctl &= ~TASK_TRACED;
wake_up_state(task, __TASK_TRACED);
- else
- WRITE_ONCE(task->__state, TASK_TRACED);
+ }
+ unlock_task_sighand(task, &flags);
}
- spin_unlock_irq(&task->sighand->siglock);
}
/**
@@ -256,7 +259,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
*/
read_lock(&tasklist_lock);
if (child->ptrace && child->parent == current) {
- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
@@ -266,17 +268,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
}
read_unlock(&tasklist_lock);
- if (!ret && !ignore_state) {
- if (!wait_task_inactive(child, __TASK_TRACED)) {
- /*
- * This can only happen if may_ptrace_stop() fails and
- * ptrace_stop() changes ->state back to TASK_RUNNING,
- * so we should not worry about leaking __TASK_TRACED.
- */
- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
- ret = -ESRCH;
- }
- }
+ if (!ret && !ignore_state &&
+ WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED)))
+ ret = -ESRCH;
return ret;
}
@@ -475,8 +469,10 @@ static int ptrace_attach(struct task_struct *task, long request,
* in and out of STOPPED are protected by siglock.
*/
if (task_is_stopped(task) &&
- task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
+ task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
+ task->jobctl &= ~JOBCTL_STOPPED;
signal_wake_up_state(task, __TASK_STOPPED);
+ }
spin_unlock(&task->sighand->siglock);
@@ -829,11 +825,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task,
}
#endif
-#ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
-#else
-#define is_singlestep(request) 0
-#endif
#ifdef PTRACE_SINGLEBLOCK
#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
@@ -850,8 +842,6 @@ static long ptrace_get_rseq_configuration(struct task_struct *task,
static int ptrace_resume(struct task_struct *child, long request,
unsigned long data)
{
- bool need_siglock;
-
if (!valid_signal(data))
return -EIO;
@@ -887,18 +877,12 @@ static int ptrace_resume(struct task_struct *child, long request,
* Note that we need siglock even if ->exit_code == data and/or this
* status was not reported yet, the new status must not be cleared by
* wait_task_stopped() after resume.
- *
- * If data == 0 we do not care if wait_task_stopped() reports the old
- * status and clears the code too; this can't race with the tracee, it
- * takes siglock after resume.
*/
- need_siglock = data && !thread_group_empty(current);
- if (need_siglock)
- spin_lock_irq(&child->sighand->siglock);
+ spin_lock_irq(&child->sighand->siglock);
child->exit_code = data;
+ child->jobctl &= ~JOBCTL_TRACED;
wake_up_state(child, __TASK_TRACED);
- if (need_siglock)
- spin_unlock_irq(&child->sighand->siglock);
+ spin_unlock_irq(&child->sighand->siglock);
return 0;
}
@@ -1221,9 +1205,7 @@ int ptrace_request(struct task_struct *child, long request,
}
#endif
-#ifdef PTRACE_SINGLESTEP
case PTRACE_SINGLESTEP:
-#endif
#ifdef PTRACE_SINGLEBLOCK
case PTRACE_SINGLEBLOCK:
#endif
@@ -1236,9 +1218,8 @@ int ptrace_request(struct task_struct *child, long request,
return ptrace_resume(child, request, data);
case PTRACE_KILL:
- if (child->exit_state) /* already dead */
- return 0;
- return ptrace_resume(child, request, SIGKILL);
+ send_sig_info(SIGKILL, SEND_SIG_NOINFO, child);
+ return 0;
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET:
@@ -1285,10 +1266,6 @@ int ptrace_request(struct task_struct *child, long request,
return ret;
}
-#ifndef arch_ptrace_attach
-#define arch_ptrace_attach(child) do { } while (0)
-#endif
-
SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
unsigned long, data)
{
@@ -1297,8 +1274,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
if (request == PTRACE_TRACEME) {
ret = ptrace_traceme();
- if (!ret)
- arch_ptrace_attach(current);
goto out;
}
@@ -1310,12 +1285,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
- /*
- * Some architectures need to do book-keeping after
- * a ptrace attach.
- */
- if (!ret)
- arch_ptrace_attach(child);
goto out_put_task_struct;
}
@@ -1455,12 +1424,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
- /*
- * Some architectures need to do book-keeping after
- * a ptrace attach.
- */
- if (!ret)
- arch_ptrace_attach(child);
goto out_put_task_struct;
}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 44228a93742b..a091145ee710 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -48,12 +48,20 @@ int reboot_cpu;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
+struct sys_off_handler {
+ struct notifier_block nb;
+ int (*sys_off_cb)(struct sys_off_data *data);
+ void *cb_data;
+ enum sys_off_mode mode;
+ bool blocking;
+ void *list;
+};
+
/*
- * If set, this is used for preparing the system to power off.
+ * Temporary stub that prevents linkage failure while we're in process
+ * of removing all uses of legacy pm_power_off() around the kernel.
*/
-
-void (*pm_power_off_prepare)(void);
-EXPORT_SYMBOL_GPL(pm_power_off_prepare);
+void __weak (*pm_power_off)(void);
/**
* emergency_restart - reboot the system
@@ -281,6 +289,316 @@ void kernel_halt(void)
}
EXPORT_SYMBOL_GPL(kernel_halt);
+/*
+ * Notifier list for kernel code which wants to be called
+ * to prepare system for power off.
+ */
+static BLOCKING_NOTIFIER_HEAD(power_off_prep_handler_list);
+
+/*
+ * Notifier list for kernel code which wants to be called
+ * to power off system.
+ */
+static ATOMIC_NOTIFIER_HEAD(power_off_handler_list);
+
+static int sys_off_notify(struct notifier_block *nb,
+ unsigned long mode, void *cmd)
+{
+ struct sys_off_handler *handler;
+ struct sys_off_data data = {};
+
+ handler = container_of(nb, struct sys_off_handler, nb);
+ data.cb_data = handler->cb_data;
+ data.mode = mode;
+ data.cmd = cmd;
+
+ return handler->sys_off_cb(&data);
+}
+
+/**
+ * register_sys_off_handler - Register sys-off handler
+ * @mode: Sys-off mode
+ * @priority: Handler priority
+ * @callback: Callback function
+ * @cb_data: Callback argument
+ *
+ * Registers system power-off or restart handler that will be invoked
+ * at the step corresponding to the given sys-off mode. Handler's callback
+ * should return NOTIFY_DONE to permit execution of the next handler in
+ * the call chain or NOTIFY_STOP to break the chain (in error case for
+ * example).
+ *
+ * Multiple handlers can be registered at the default priority level.
+ *
+ * Only one handler can be registered at the non-default priority level,
+ * otherwise ERR_PTR(-EBUSY) is returned.
+ *
+ * Returns a new instance of struct sys_off_handler on success, or
+ * an ERR_PTR()-encoded error code otherwise.
+ */
+struct sys_off_handler *
+register_sys_off_handler(enum sys_off_mode mode,
+ int priority,
+ int (*callback)(struct sys_off_data *data),
+ void *cb_data)
+{
+ struct sys_off_handler *handler;
+ int err;
+
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler)
+ return ERR_PTR(-ENOMEM);
+
+ switch (mode) {
+ case SYS_OFF_MODE_POWER_OFF_PREPARE:
+ handler->list = &power_off_prep_handler_list;
+ handler->blocking = true;
+ break;
+
+ case SYS_OFF_MODE_POWER_OFF:
+ handler->list = &power_off_handler_list;
+ break;
+
+ case SYS_OFF_MODE_RESTART:
+ handler->list = &restart_handler_list;
+ break;
+
+ default:
+ kfree(handler);
+ return ERR_PTR(-EINVAL);
+ }
+
+ handler->nb.notifier_call = sys_off_notify;
+ handler->nb.priority = priority;
+ handler->sys_off_cb = callback;
+ handler->cb_data = cb_data;
+ handler->mode = mode;
+
+ if (handler->blocking) {
+ if (priority == SYS_OFF_PRIO_DEFAULT)
+ err = blocking_notifier_chain_register(handler->list,
+ &handler->nb);
+ else
+ err = blocking_notifier_chain_register_unique_prio(handler->list,
+ &handler->nb);
+ } else {
+ if (priority == SYS_OFF_PRIO_DEFAULT)
+ err = atomic_notifier_chain_register(handler->list,
+ &handler->nb);
+ else
+ err = atomic_notifier_chain_register_unique_prio(handler->list,
+ &handler->nb);
+ }
+
+ if (err) {
+ kfree(handler);
+ return ERR_PTR(err);
+ }
+
+ return handler;
+}
+EXPORT_SYMBOL_GPL(register_sys_off_handler);
+
+/**
+ * unregister_sys_off_handler - Unregister sys-off handler
+ * @handler: Sys-off handler
+ *
+ * Unregisters given sys-off handler.
+ */
+void unregister_sys_off_handler(struct sys_off_handler *handler)
+{
+ int err;
+
+ if (!handler)
+ return;
+
+ if (handler->blocking)
+ err = blocking_notifier_chain_unregister(handler->list,
+ &handler->nb);
+ else
+ err = atomic_notifier_chain_unregister(handler->list,
+ &handler->nb);
+
+ /* sanity check, shall never happen */
+ WARN_ON(err);
+
+ kfree(handler);
+}
+EXPORT_SYMBOL_GPL(unregister_sys_off_handler);
+
+static void devm_unregister_sys_off_handler(void *data)
+{
+ struct sys_off_handler *handler = data;
+
+ unregister_sys_off_handler(handler);
+}
+
+/**
+ * devm_register_sys_off_handler - Register sys-off handler
+ * @dev: Device that registers handler
+ * @mode: Sys-off mode
+ * @priority: Handler priority
+ * @callback: Callback function
+ * @cb_data: Callback argument
+ *
+ * Registers resource-managed sys-off handler.
+ *
+ * Returns zero on success, or error code on failure.
+ */
+int devm_register_sys_off_handler(struct device *dev,
+ enum sys_off_mode mode,
+ int priority,
+ int (*callback)(struct sys_off_data *data),
+ void *cb_data)
+{
+ struct sys_off_handler *handler;
+
+ handler = register_sys_off_handler(mode, priority, callback, cb_data);
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
+
+ return devm_add_action_or_reset(dev, devm_unregister_sys_off_handler,
+ handler);
+}
+EXPORT_SYMBOL_GPL(devm_register_sys_off_handler);
+
+/**
+ * devm_register_power_off_handler - Register power-off handler
+ * @dev: Device that registers callback
+ * @callback: Callback function
+ * @cb_data: Callback's argument
+ *
+ * Registers resource-managed sys-off handler with a default priority
+ * and using power-off mode.
+ *
+ * Returns zero on success, or error code on failure.
+ */
+int devm_register_power_off_handler(struct device *dev,
+ int (*callback)(struct sys_off_data *data),
+ void *cb_data)
+{
+ return devm_register_sys_off_handler(dev,
+ SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ callback, cb_data);
+}
+EXPORT_SYMBOL_GPL(devm_register_power_off_handler);
+
+/**
+ * devm_register_restart_handler - Register restart handler
+ * @dev: Device that registers callback
+ * @callback: Callback function
+ * @cb_data: Callback's argument
+ *
+ * Registers resource-managed sys-off handler with a default priority
+ * and using restart mode.
+ *
+ * Returns zero on success, or error code on failure.
+ */
+int devm_register_restart_handler(struct device *dev,
+ int (*callback)(struct sys_off_data *data),
+ void *cb_data)
+{
+ return devm_register_sys_off_handler(dev,
+ SYS_OFF_MODE_RESTART,
+ SYS_OFF_PRIO_DEFAULT,
+ callback, cb_data);
+}
+EXPORT_SYMBOL_GPL(devm_register_restart_handler);
+
+static struct sys_off_handler *platform_power_off_handler;
+
+static int platform_power_off_notify(struct sys_off_data *data)
+{
+ void (*platform_power_power_off_cb)(void) = data->cb_data;
+
+ platform_power_power_off_cb();
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * register_platform_power_off - Register platform-level power-off callback
+ * @power_off: Power-off callback
+ *
+ * Registers power-off callback that will be called as last step
+ * of the power-off sequence. This callback is expected to be invoked
+ * for the last resort. Only one platform power-off callback is allowed
+ * to be registered at a time.
+ *
+ * Returns zero on success, or error code on failure.
+ */
+int register_platform_power_off(void (*power_off)(void))
+{
+ struct sys_off_handler *handler;
+
+ handler = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_PLATFORM,
+ platform_power_off_notify,
+ power_off);
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
+
+ platform_power_off_handler = handler;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_platform_power_off);
+
+/**
+ * unregister_platform_power_off - Unregister platform-level power-off callback
+ * @power_off: Power-off callback
+ *
+ * Unregisters previously registered platform power-off callback.
+ */
+void unregister_platform_power_off(void (*power_off)(void))
+{
+ if (platform_power_off_handler &&
+ platform_power_off_handler->cb_data == power_off) {
+ unregister_sys_off_handler(platform_power_off_handler);
+ platform_power_off_handler = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(unregister_platform_power_off);
+
+static int legacy_pm_power_off(struct sys_off_data *data)
+{
+ if (pm_power_off)
+ pm_power_off();
+
+ return NOTIFY_DONE;
+}
+
+static void do_kernel_power_off_prepare(void)
+{
+ blocking_notifier_call_chain(&power_off_prep_handler_list, 0, NULL);
+}
+
+/**
+ * do_kernel_power_off - Execute kernel power-off handler call chain
+ *
+ * Expected to be called as last step of the power-off sequence.
+ *
+ * Powers off the system immediately if a power-off handler function has
+ * been registered. Otherwise does nothing.
+ */
+void do_kernel_power_off(void)
+{
+ atomic_notifier_call_chain(&power_off_handler_list, 0, NULL);
+}
+
+/**
+ * kernel_can_power_off - check whether system can be powered off
+ *
+ * Returns true if power-off handler is registered and system can be
+ * powered off, false otherwise.
+ */
+bool kernel_can_power_off(void)
+{
+ return !atomic_notifier_call_chain_is_empty(&power_off_handler_list);
+}
+EXPORT_SYMBOL_GPL(kernel_can_power_off);
+
/**
* kernel_power_off - power_off the system
*
@@ -289,8 +607,7 @@ EXPORT_SYMBOL_GPL(kernel_halt);
void kernel_power_off(void)
{
kernel_shutdown_prepare(SYSTEM_POWER_OFF);
- if (pm_power_off_prepare)
- pm_power_off_prepare();
+ do_kernel_power_off_prepare();
migrate_to_reboot_cpu();
syscore_shutdown();
pr_emerg("Power down\n");
@@ -313,6 +630,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
void __user *, arg)
{
struct pid_namespace *pid_ns = task_active_pid_ns(current);
+ struct sys_off_handler *sys_off = NULL;
char buffer[256];
int ret = 0;
@@ -337,10 +655,25 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
if (ret)
return ret;
+ /*
+ * Register sys-off handlers for legacy PM callback. This allows
+ * legacy PM callbacks temporary co-exist with the new sys-off API.
+ *
+ * TODO: Remove legacy handlers once all legacy PM users will be
+ * switched to the sys-off based APIs.
+ */
+ if (pm_power_off) {
+ sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ legacy_pm_power_off, NULL);
+ if (IS_ERR(sys_off))
+ return PTR_ERR(sys_off);
+ }
+
/* Instead of trying to make the power_off code look like
* halt when pm_power_off is not set do it the easy way.
*/
- if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
+ if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !kernel_can_power_off())
cmd = LINUX_REBOOT_CMD_HALT;
mutex_lock(&system_transition_mutex);
@@ -394,6 +727,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
break;
}
mutex_unlock(&system_transition_mutex);
+ unregister_sys_off_handler(sys_off);
return ret;
}
diff --git a/kernel/relay.c b/kernel/relay.c
index d1a67fbb819d..6a611e779e95 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -440,7 +440,7 @@ int relay_prepare_cpu(unsigned int cpu)
mutex_lock(&relay_channels_mutex);
list_for_each_entry(chan, &relay_channels, list) {
- if ((buf = *per_cpu_ptr(chan->buf, cpu)))
+ if (*per_cpu_ptr(chan->buf, cpu))
continue;
buf = relay_open_buf(chan, cpu);
if (!buf) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 696c6490bd5b..bfa7452ca92e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6353,10 +6353,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
/*
* We must load prev->state once (task_struct::state is volatile), such
- * that:
- *
- * - we form a control dependency vs deactivate_task() below.
- * - ptrace_{,un}freeze_traced() can change ->state underneath us.
+ * that we form a control dependency vs deactivate_task() below.
*/
prev_state = READ_ONCE(prev->__state);
if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c5b74f66bd3..77b2048a9326 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2927,7 +2927,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
/*
* We don't care about NUMA placement if we don't have memory.
*/
- if ((curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work)
+ if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work)
return;
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index e43bc2a692f5..edb1dc9b00dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -762,7 +762,10 @@ still_pending:
*/
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
+ lockdep_assert_held(&t->sighand->siglock);
+
set_tsk_thread_flag(t, TIF_SIGPENDING);
+
/*
* TASK_WAKEKILL also means wake it up in the stopped/traced/killable
* case. We don't check t->state here because there is a race with it
@@ -884,7 +887,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info,
static void ptrace_trap_notify(struct task_struct *t)
{
WARN_ON_ONCE(!(t->ptrace & PT_SEIZED));
- assert_spin_locked(&t->sighand->siglock);
+ lockdep_assert_held(&t->sighand->siglock);
task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
@@ -930,9 +933,10 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
for_each_thread(p, t) {
flush_sigqueue_mask(&flush, &t->pending);
task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
- if (likely(!(t->ptrace & PT_SEIZED)))
+ if (likely(!(t->ptrace & PT_SEIZED))) {
+ t->jobctl &= ~JOBCTL_STOPPED;
wake_up_state(t, __TASK_STOPPED);
- else
+ } else
ptrace_trap_notify(t);
}
@@ -1071,15 +1075,15 @@ static inline bool legacy_queue(struct sigpending *signals, int sig)
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
-static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
- enum pid_type type, bool force)
+static int __send_signal_locked(int sig, struct kernel_siginfo *info,
+ struct task_struct *t, enum pid_type type, bool force)
{
struct sigpending *pending;
struct sigqueue *q;
int override_rlimit;
int ret = 0, result;
- assert_spin_locked(&t->sighand->siglock);
+ lockdep_assert_held(&t->sighand->siglock);
result = TRACE_SIGNAL_IGNORED;
if (!prepare_signal(sig, t, force))
@@ -1212,8 +1216,8 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
return ret;
}
-static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
- enum pid_type type)
+int send_signal_locked(int sig, struct kernel_siginfo *info,
+ struct task_struct *t, enum pid_type type)
{
/* Should SIGKILL or SIGSTOP be received by a pid namespace init? */
bool force = false;
@@ -1245,7 +1249,7 @@ static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct
force = true;
}
}
- return __send_signal(sig, info, t, type, force);
+ return __send_signal_locked(sig, info, t, type, force);
}
static void print_fatal_signal(int signr)
@@ -1281,12 +1285,6 @@ static int __init setup_print_fatal_signals(char *str)
__setup("print-fatal-signals=", setup_print_fatal_signals);
-int
-__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p)
-{
- return send_signal(sig, info, p, PIDTYPE_TGID);
-}
-
int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p,
enum pid_type type)
{
@@ -1294,7 +1292,7 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
int ret = -ESRCH;
if (lock_task_sighand(p, &flags)) {
- ret = send_signal(sig, info, p, type);
+ ret = send_signal_locked(sig, info, p, type);
unlock_task_sighand(p, &flags);
}
@@ -1347,7 +1345,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
if (action->sa.sa_handler == SIG_DFL &&
(!t->ptrace || (handler == HANDLER_EXIT)))
t->signal->flags &= ~SIGNAL_UNKILLABLE;
- ret = send_signal(sig, info, t, PIDTYPE_PID);
+ ret = send_signal_locked(sig, info, t, PIDTYPE_PID);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
return ret;
@@ -1567,7 +1565,7 @@ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr,
if (sig) {
if (lock_task_sighand(p, &flags)) {
- ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false);
+ ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false);
unlock_task_sighand(p, &flags);
} else
ret = -ESRCH;
@@ -2114,7 +2112,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
* parent's namespaces.
*/
if (valid_signal(sig) && sig)
- __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false);
+ __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false);
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);
@@ -2184,7 +2182,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
spin_lock_irqsave(&sighand->siglock, flags);
if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
!(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
- __group_send_sig_info(SIGCHLD, &info, parent);
+ send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID);
/*
* Even if SIGCHLD is not generated, we must wake up wait4 calls.
*/
@@ -2204,13 +2202,12 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
* with. If the code did not stop because the tracer is gone,
* the stop signal remains unchanged unless clear_code.
*/
-static int ptrace_stop(int exit_code, int why, int clear_code,
- unsigned long message, kernel_siginfo_t *info)
+static int ptrace_stop(int exit_code, int why, unsigned long message,
+ kernel_siginfo_t *info)
__releases(&current->sighand->siglock)
__acquires(&current->sighand->siglock)
{
bool gstop_done = false;
- bool read_code = true;
if (arch_ptrace_stop_needed()) {
/*
@@ -2227,10 +2224,16 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
}
/*
- * schedule() will not sleep if there is a pending signal that
- * can awaken the task.
+ * After this point ptrace_signal_wake_up or signal_wake_up
+ * will clear TASK_TRACED if ptrace_unlink happens or a fatal
+ * signal comes in. Handle previous ptrace_unlinks and fatal
+ * signals here to prevent ptrace_stop sleeping in schedule.
*/
+ if (!current->ptrace || __fatal_signal_pending(current))
+ return exit_code;
+
set_special_state(TASK_TRACED);
+ current->jobctl |= JOBCTL_TRACED;
/*
* We're committing to trapping. TRACED should be visible before
@@ -2276,54 +2279,33 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
spin_unlock_irq(&current->sighand->siglock);
read_lock(&tasklist_lock);
- if (likely(current->ptrace)) {
- /*
- * Notify parents of the stop.
- *
- * While ptraced, there are two parents - the ptracer and
- * the real_parent of the group_leader. The ptracer should
- * know about every stop while the real parent is only
- * interested in the completion of group stop. The states
- * for the two don't interact with each other. Notify
- * separately unless they're gonna be duplicates.
- */
+ /*
+ * Notify parents of the stop.
+ *
+ * While ptraced, there are two parents - the ptracer and
+ * the real_parent of the group_leader. The ptracer should
+ * know about every stop while the real parent is only
+ * interested in the completion of group stop. The states
+ * for the two don't interact with each other. Notify
+ * separately unless they're gonna be duplicates.
+ */
+ if (current->ptrace)
do_notify_parent_cldstop(current, true, why);
- if (gstop_done && ptrace_reparented(current))
- do_notify_parent_cldstop(current, false, why);
+ if (gstop_done && (!current->ptrace || ptrace_reparented(current)))
+ do_notify_parent_cldstop(current, false, why);
- /*
- * Don't want to allow preemption here, because
- * sys_ptrace() needs this task to be inactive.
- *
- * XXX: implement read_unlock_no_resched().
- */
- preempt_disable();
- read_unlock(&tasklist_lock);
- cgroup_enter_frozen();
- preempt_enable_no_resched();
- freezable_schedule();
- cgroup_leave_frozen(true);
- } else {
- /*
- * By the time we got the lock, our tracer went away.
- * Don't drop the lock yet, another tracer may come.
- *
- * If @gstop_done, the ptracer went away between group stop
- * completion and here. During detach, it would have set
- * JOBCTL_STOP_PENDING on us and we'll re-enter
- * TASK_STOPPED in do_signal_stop() on return, so notifying
- * the real parent of the group stop completion is enough.
- */
- if (gstop_done)
- do_notify_parent_cldstop(current, false, why);
-
- /* tasklist protects us from ptrace_freeze_traced() */
- __set_current_state(TASK_RUNNING);
- read_code = false;
- if (clear_code)
- exit_code = 0;
- read_unlock(&tasklist_lock);
- }
+ /*
+ * Don't want to allow preemption here, because
+ * sys_ptrace() needs this task to be inactive.
+ *
+ * XXX: implement read_unlock_no_resched().
+ */
+ preempt_disable();
+ read_unlock(&tasklist_lock);
+ cgroup_enter_frozen();
+ preempt_enable_no_resched();
+ freezable_schedule();
+ cgroup_leave_frozen(true);
/*
* We are back. Now reacquire the siglock before touching
@@ -2331,14 +2313,13 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
* any signal-sending on another CPU that wants to examine it.
*/
spin_lock_irq(&current->sighand->siglock);
- if (read_code)
- exit_code = current->exit_code;
+ exit_code = current->exit_code;
current->last_siginfo = NULL;
current->ptrace_message = 0;
current->exit_code = 0;
/* LISTENING can be set only during STOP traps, clear it */
- current->jobctl &= ~JOBCTL_LISTENING;
+ current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN);
/*
* Queued signals ignored us while we were stopped for tracing.
@@ -2360,7 +2341,7 @@ static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long mes
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
/* Let the debugger run. */
- return ptrace_stop(exit_code, why, 1, message, &info);
+ return ptrace_stop(exit_code, why, message, &info);
}
int ptrace_notify(int exit_code, unsigned long message)
@@ -2471,6 +2452,7 @@ static bool do_signal_stop(int signr)
if (task_participate_group_stop(current))
notify = CLD_STOPPED;
+ current->jobctl |= JOBCTL_STOPPED;
set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
@@ -2532,7 +2514,7 @@ static void do_jobctl_trap(void)
CLD_STOPPED, 0);
} else {
WARN_ON_ONCE(!signr);
- ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL);
+ ptrace_stop(signr, CLD_STOPPED, 0, NULL);
}
}
@@ -2585,7 +2567,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
* comment in dequeue_signal().
*/
current->jobctl |= JOBCTL_STOP_DEQUEUED;
- signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info);
+ signr = ptrace_stop(signr, CLD_TRAPPED, 0, info);
/* We're back. Did the debugger cancel the sig? */
if (signr == 0)
@@ -2612,7 +2594,7 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
/* If the (new) signal is now blocked, requeue it. */
if (sigismember(&current->blocked, signr) ||
fatal_signal_pending(current)) {
- send_signal(signr, info, current, type);
+ send_signal_locked(signr, info, current, type);
signr = 0;
}
@@ -4807,7 +4789,7 @@ void kdb_send_sig(struct task_struct *t, int sig)
"the deadlock.\n");
return;
}
- ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
+ ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
spin_unlock(&t->sighand->siglock);
if (ret)
kdb_printf("Fail to deliver Signal %d to process %d.\n",
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index bcac5a9043aa..f7e246336218 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
#include <linux/tsacct_kern.h>
+#include <linux/acct.h>
#include <linux/delayacct.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
@@ -153,6 +154,23 @@ static void send_cpu_listeners(struct sk_buff *skb,
up_write(&listeners->sem);
}
+static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+{
+ /* No idea if I'm allowed to access that here, now. */
+ struct file *exe_file = get_task_exe_file(tsk);
+
+ if (exe_file) {
+ /* Following cp_new_stat64() in stat.c . */
+ stats->ac_exe_dev =
+ huge_encode_dev(exe_file->f_inode->i_sb->s_dev);
+ stats->ac_exe_inode = exe_file->f_inode->i_ino;
+ fput(exe_file);
+ } else {
+ stats->ac_exe_dev = 0;
+ stats->ac_exe_inode = 0;
+ }
+}
+
static void fill_stats(struct user_namespace *user_ns,
struct pid_namespace *pid_ns,
struct task_struct *tsk, struct taskstats *stats)
@@ -175,6 +193,9 @@ static void fill_stats(struct user_namespace *user_ns,
/* fill in extended acct fields */
xacct_add_tsk(stats, tsk);
+
+ /* add executable info */
+ exe_add_tsk(stats, tsk);
}
static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
@@ -620,6 +641,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
goto err;
fill_stats(&init_user_ns, &init_pid_ns, tsk, stats);
+ if (group_dead)
+ stats->ac_flag |= AGROUP;
/*
* Doesn't matter if tsk is the leader or the last group member leaving
@@ -665,6 +688,7 @@ static struct genl_family family __ro_after_init = {
.module = THIS_MODULE,
.ops = taskstats_ops,
.n_ops = ARRAY_SIZE(taskstats_ops),
+ .netnsok = true,
};
/* Needed early in initialization */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 0a97193984db..cb925e8ef9a8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -870,7 +870,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
{
if (tsk->dl.dl_overrun) {
tsk->dl.dl_overrun = 0;
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+ send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
}
}
@@ -884,7 +884,7 @@ static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
rt ? "RT" : "CPU", hard ? "hard" : "soft",
current->comm, task_pid_nr(current));
}
- __group_send_sig_info(signo, SEND_SIG_PRIV, current);
+ send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID);
return true;
}
@@ -958,7 +958,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
trace_itimer_expire(signo == SIGPROF ?
ITIMER_PROF : ITIMER_VIRTUAL,
task_tgid(tsk), cur_time);
- __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+ send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
}
if (it->expires && it->expires < *expires)
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d77cd8032213..0d261774d6f3 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -31,6 +31,10 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE
GCOV_PROFILE := y
endif
+# Functions in this file could be invoked from early interrupt
+# code and produce random code coverage.
+KCOV_INSTRUMENT_trace_preemptirq.o := n
+
CFLAGS_bpf_trace.o := -I$(src)
CFLAGS_trace_benchmark.o := -I$(src)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 3fd5284f6487..218cd95bf8e4 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -30,6 +30,7 @@ int ftrace_graph_active;
/* Both enabled by default (can be cleared by function_graph tracer flags */
static bool fgraph_sleep_time = true;
+#ifdef CONFIG_DYNAMIC_FTRACE
/*
* archs can override this function if they must do something
* to enable hook for graph tracer.
@@ -47,6 +48,7 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
{
return 0;
}
+#endif
/**
* ftrace_graph_stop - set to permanently disable function graph tracing
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f8db59c3a601..e750fe141a60 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -45,6 +45,8 @@
#include "trace_output.h"
#include "trace_stat.h"
+#define FTRACE_INVALID_FUNCTION "__ftrace_invalid_address__"
+
#define FTRACE_WARN_ON(cond) \
({ \
int ___r = cond; \
@@ -119,7 +121,7 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
struct ftrace_ops global_ops;
-/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */
+/* Defined by vmlinux.lds.h see the comment above arch_ftrace_ops_list_func for details */
void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
@@ -952,7 +954,6 @@ static struct tracer_stat function_stats __initdata = {
static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
- struct dentry *entry;
char *name;
int ret;
int cpu;
@@ -983,11 +984,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
}
}
- entry = tracefs_create_file("function_profile_enabled",
- TRACE_MODE_WRITE, d_tracer, NULL,
- &ftrace_profile_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'function_profile_enabled' entry\n");
+ trace_create_file("function_profile_enabled",
+ TRACE_MODE_WRITE, d_tracer, NULL,
+ &ftrace_profile_fops);
}
#else /* CONFIG_FUNCTION_PROFILER */
@@ -2707,18 +2706,16 @@ ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec)
* archs can override this function if they must do something
* before the modifying code is performed.
*/
-int __weak ftrace_arch_code_modify_prepare(void)
+void __weak ftrace_arch_code_modify_prepare(void)
{
- return 0;
}
/*
* archs can override this function if they must do something
* after the modifying code is performed.
*/
-int __weak ftrace_arch_code_modify_post_process(void)
+void __weak ftrace_arch_code_modify_post_process(void)
{
- return 0;
}
void ftrace_modify_all_code(int command)
@@ -2804,12 +2801,7 @@ void __weak arch_ftrace_update_code(int command)
static void ftrace_run_update_code(int command)
{
- int ret;
-
- ret = ftrace_arch_code_modify_prepare();
- FTRACE_WARN_ON(ret);
- if (ret)
- return;
+ ftrace_arch_code_modify_prepare();
/*
* By default we use stop_machine() to modify the code.
@@ -2819,8 +2811,7 @@ static void ftrace_run_update_code(int command)
*/
arch_ftrace_update_code(command);
- ret = ftrace_arch_code_modify_post_process();
- FTRACE_WARN_ON(ret);
+ ftrace_arch_code_modify_post_process();
}
static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -3631,6 +3622,105 @@ static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops,
seq_printf(m, " ->%pS", ptr);
}
+#ifdef FTRACE_MCOUNT_MAX_OFFSET
+/*
+ * Weak functions can still have an mcount/fentry that is saved in
+ * the __mcount_loc section. These can be detected by having a
+ * symbol offset of greater than FTRACE_MCOUNT_MAX_OFFSET, as the
+ * symbol found by kallsyms is not the function that the mcount/fentry
+ * is part of. The offset is much greater in these cases.
+ *
+ * Test the record to make sure that the ip points to a valid kallsyms
+ * and if not, mark it disabled.
+ */
+static int test_for_valid_rec(struct dyn_ftrace *rec)
+{
+ char str[KSYM_SYMBOL_LEN];
+ unsigned long offset;
+ const char *ret;
+
+ ret = kallsyms_lookup(rec->ip, NULL, &offset, NULL, str);
+
+ /* Weak functions can cause invalid addresses */
+ if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) {
+ rec->flags |= FTRACE_FL_DISABLED;
+ return 0;
+ }
+ return 1;
+}
+
+static struct workqueue_struct *ftrace_check_wq __initdata;
+static struct work_struct ftrace_check_work __initdata;
+
+/*
+ * Scan all the mcount/fentry entries to make sure they are valid.
+ */
+static __init void ftrace_check_work_func(struct work_struct *work)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+ test_for_valid_rec(rec);
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
+}
+
+static int __init ftrace_check_for_weak_functions(void)
+{
+ INIT_WORK(&ftrace_check_work, ftrace_check_work_func);
+
+ ftrace_check_wq = alloc_workqueue("ftrace_check_wq", WQ_UNBOUND, 0);
+
+ queue_work(ftrace_check_wq, &ftrace_check_work);
+ return 0;
+}
+
+static int __init ftrace_check_sync(void)
+{
+ /* Make sure the ftrace_check updates are finished */
+ if (ftrace_check_wq)
+ destroy_workqueue(ftrace_check_wq);
+ return 0;
+}
+
+late_initcall_sync(ftrace_check_sync);
+subsys_initcall(ftrace_check_for_weak_functions);
+
+static int print_rec(struct seq_file *m, unsigned long ip)
+{
+ unsigned long offset;
+ char str[KSYM_SYMBOL_LEN];
+ char *modname;
+ const char *ret;
+
+ ret = kallsyms_lookup(ip, NULL, &offset, &modname, str);
+ /* Weak functions can cause invalid addresses */
+ if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) {
+ snprintf(str, KSYM_SYMBOL_LEN, "%s_%ld",
+ FTRACE_INVALID_FUNCTION, offset);
+ ret = NULL;
+ }
+
+ seq_puts(m, str);
+ if (modname)
+ seq_printf(m, " [%s]", modname);
+ return ret == NULL ? -1 : 0;
+}
+#else
+static inline int test_for_valid_rec(struct dyn_ftrace *rec)
+{
+ return 1;
+}
+
+static inline int print_rec(struct seq_file *m, unsigned long ip)
+{
+ seq_printf(m, "%ps", (void *)ip);
+ return 0;
+}
+#endif
+
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_iterator *iter = m->private;
@@ -3655,7 +3745,13 @@ static int t_show(struct seq_file *m, void *v)
if (!rec)
return 0;
- seq_printf(m, "%ps", (void *)rec->ip);
+ if (print_rec(m, rec->ip)) {
+ /* This should only happen when a rec is disabled */
+ WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
+ seq_putc(m, '\n');
+ return 0;
+ }
+
if (iter->flags & FTRACE_ITER_ENABLED) {
struct ftrace_ops *ops;
@@ -3973,6 +4069,24 @@ add_rec_by_index(struct ftrace_hash *hash, struct ftrace_glob *func_g,
return 0;
}
+#ifdef FTRACE_MCOUNT_MAX_OFFSET
+static int lookup_ip(unsigned long ip, char **modname, char *str)
+{
+ unsigned long offset;
+
+ kallsyms_lookup(ip, NULL, &offset, modname, str);
+ if (offset > FTRACE_MCOUNT_MAX_OFFSET)
+ return -1;
+ return 0;
+}
+#else
+static int lookup_ip(unsigned long ip, char **modname, char *str)
+{
+ kallsyms_lookup(ip, NULL, NULL, modname, str);
+ return 0;
+}
+#endif
+
static int
ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g,
struct ftrace_glob *mod_g, int exclude_mod)
@@ -3980,7 +4094,12 @@ ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g,
char str[KSYM_SYMBOL_LEN];
char *modname;
- kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
+ if (lookup_ip(rec->ip, &modname, str)) {
+ /* This should only happen when a rec is disabled */
+ WARN_ON_ONCE(system_state == SYSTEM_RUNNING &&
+ !(rec->flags & FTRACE_FL_DISABLED));
+ return 0;
+ }
if (mod_g) {
int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0;
@@ -4431,7 +4550,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
* @ip: The instruction pointer address to remove the data from
*
* Returns the data if it is found, otherwise NULL.
- * Note, if the data pointer is used as the data itself, (see
+ * Note, if the data pointer is used as the data itself, (see
* ftrace_func_mapper_find_ip(), then the return value may be meaningless,
* if the data pointer was set to zero.
*/
@@ -4526,8 +4645,8 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
struct ftrace_probe_ops *probe_ops,
void *data)
{
+ struct ftrace_func_probe *probe = NULL, *iter;
struct ftrace_func_entry *entry;
- struct ftrace_func_probe *probe;
struct ftrace_hash **orig_hash;
struct ftrace_hash *old_hash;
struct ftrace_hash *hash;
@@ -4546,11 +4665,13 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
mutex_lock(&ftrace_lock);
/* Check if the probe_ops is already registered */
- list_for_each_entry(probe, &tr->func_probes, list) {
- if (probe->probe_ops == probe_ops)
+ list_for_each_entry(iter, &tr->func_probes, list) {
+ if (iter->probe_ops == probe_ops) {
+ probe = iter;
break;
+ }
}
- if (&probe->list == &tr->func_probes) {
+ if (!probe) {
probe = kzalloc(sizeof(*probe), GFP_KERNEL);
if (!probe) {
mutex_unlock(&ftrace_lock);
@@ -4668,9 +4789,9 @@ int
unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
struct ftrace_probe_ops *probe_ops)
{
+ struct ftrace_func_probe *probe = NULL, *iter;
struct ftrace_ops_hash old_hash_ops;
struct ftrace_func_entry *entry;
- struct ftrace_func_probe *probe;
struct ftrace_glob func_g;
struct ftrace_hash **orig_hash;
struct ftrace_hash *old_hash;
@@ -4698,11 +4819,13 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
mutex_lock(&ftrace_lock);
/* Check if the probe_ops is already registered */
- list_for_each_entry(probe, &tr->func_probes, list) {
- if (probe->probe_ops == probe_ops)
+ list_for_each_entry(iter, &tr->func_probes, list) {
+ if (iter->probe_ops == probe_ops) {
+ probe = iter;
break;
+ }
}
- if (&probe->list == &tr->func_probes)
+ if (!probe)
goto err_unlock_ftrace;
ret = -EINVAL;
@@ -5161,8 +5284,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
goto out_unlock;
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
- if (ret)
- remove_hash_entry(direct_functions, entry);
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
ret = register_ftrace_function(&direct_ops);
@@ -5171,6 +5292,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
}
if (ret) {
+ remove_hash_entry(direct_functions, entry);
kfree(entry);
if (!direct->count) {
list_del_rcu(&direct->next);
@@ -6793,6 +6915,13 @@ void ftrace_module_enable(struct module *mod)
!within_module_init(rec->ip, mod))
break;
+ /* Weak functions should still be ignored */
+ if (!test_for_valid_rec(rec)) {
+ /* Clear all other flags. Should not be enabled anyway */
+ rec->flags = FTRACE_FL_DISABLED;
+ continue;
+ }
+
cnt = 0;
/*
@@ -6829,11 +6958,16 @@ void ftrace_module_enable(struct module *mod)
void ftrace_module_init(struct module *mod)
{
+ int ret;
+
if (ftrace_disabled || !mod->num_ftrace_callsites)
return;
- ftrace_process_locs(mod, mod->ftrace_callsites,
- mod->ftrace_callsites + mod->num_ftrace_callsites);
+ ret = ftrace_process_locs(mod, mod->ftrace_callsites,
+ mod->ftrace_callsites + mod->num_ftrace_callsites);
+ if (ret)
+ pr_warn("ftrace: failed to allocate entries for module '%s' functions\n",
+ mod->name);
}
static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map,
@@ -7166,15 +7300,19 @@ void __init ftrace_init(void)
pr_info("ftrace: allocating %ld entries in %ld pages\n",
count, count / ENTRIES_PER_PAGE + 1);
- last_ftrace_enabled = ftrace_enabled = 1;
-
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
__stop_mcount_loc);
+ if (ret) {
+ pr_warn("ftrace: failed to allocate entries for functions\n");
+ goto failed;
+ }
pr_info("ftrace: allocated %ld pages with %ld groups\n",
ftrace_number_of_pages, ftrace_number_of_groups);
+ last_ftrace_enabled = ftrace_enabled = 1;
+
set_ftrace_early_filters();
return;
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index a2ef1d18126a..95106d02b32d 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -118,9 +118,9 @@ static inline unsigned int pid_join(unsigned int upper1,
/**
* trace_pid_list_is_set - test if the pid is set in the list
* @pid_list: The pid list to test
- * @pid: The pid to to see if set in the list.
+ * @pid: The pid to see if set in the list.
*
- * Tests if @pid is is set in the @pid_list. This is usually called
+ * Tests if @pid is set in the @pid_list. This is usually called
* from the scheduler when a task is scheduled. Its pid is checked
* if it should be traced or not.
*
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05dfc7a12d3d..d59b6a328b7f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -29,6 +29,14 @@
#include <asm/local.h>
+/*
+ * The "absolute" timestamp in the buffer is only 59 bits.
+ * If a clock has the 5 MSBs set, it needs to be saved and
+ * reinserted.
+ */
+#define TS_MSB (0xf8ULL << 56)
+#define ABS_TS_MASK (~TS_MSB)
+
static void update_pages_handler(struct work_struct *work);
/*
@@ -468,6 +476,7 @@ struct rb_time_struct {
local_t cnt;
local_t top;
local_t bottom;
+ local_t msb;
};
#else
#include <asm/local64.h>
@@ -569,7 +578,6 @@ struct ring_buffer_iter {
* For the ring buffer, 64 bit required operations for the time is
* the following:
*
- * - Only need 59 bits (uses 60 to make it even).
* - Reads may fail if it interrupted a modification of the time stamp.
* It will succeed if it did not interrupt another write even if
* the read itself is interrupted by a write.
@@ -594,6 +602,7 @@ struct ring_buffer_iter {
*/
#define RB_TIME_SHIFT 30
#define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1)
+#define RB_TIME_MSB_SHIFT 60
static inline int rb_time_cnt(unsigned long val)
{
@@ -613,7 +622,7 @@ static inline u64 rb_time_val(unsigned long top, unsigned long bottom)
static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
{
- unsigned long top, bottom;
+ unsigned long top, bottom, msb;
unsigned long c;
/*
@@ -625,6 +634,7 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
c = local_read(&t->cnt);
top = local_read(&t->top);
bottom = local_read(&t->bottom);
+ msb = local_read(&t->msb);
} while (c != local_read(&t->cnt));
*cnt = rb_time_cnt(top);
@@ -633,7 +643,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
if (*cnt != rb_time_cnt(bottom))
return false;
- *ret = rb_time_val(top, bottom);
+ /* The shift to msb will lose its cnt bits */
+ *ret = rb_time_val(top, bottom) | ((u64)msb << RB_TIME_MSB_SHIFT);
return true;
}
@@ -649,10 +660,12 @@ static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt
return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT);
}
-static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom)
+static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom,
+ unsigned long *msb)
{
*top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK);
*bottom = (unsigned long)(val & RB_TIME_VAL_MASK);
+ *msb = (unsigned long)(val >> RB_TIME_MSB_SHIFT);
}
static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt)
@@ -663,15 +676,16 @@ static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long
static void rb_time_set(rb_time_t *t, u64 val)
{
- unsigned long cnt, top, bottom;
+ unsigned long cnt, top, bottom, msb;
- rb_time_split(val, &top, &bottom);
+ rb_time_split(val, &top, &bottom, &msb);
/* Writes always succeed with a valid number even if it gets interrupted. */
do {
cnt = local_inc_return(&t->cnt);
rb_time_val_set(&t->top, top, cnt);
rb_time_val_set(&t->bottom, bottom, cnt);
+ rb_time_val_set(&t->msb, val >> RB_TIME_MSB_SHIFT, cnt);
} while (cnt != local_read(&t->cnt));
}
@@ -686,8 +700,8 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
- unsigned long cnt, top, bottom;
- unsigned long cnt2, top2, bottom2;
+ unsigned long cnt, top, bottom, msb;
+ unsigned long cnt2, top2, bottom2, msb2;
u64 val;
/* The cmpxchg always fails if it interrupted an update */
@@ -703,16 +717,18 @@ static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
cnt2 = cnt + 1;
- rb_time_split(val, &top, &bottom);
+ rb_time_split(val, &top, &bottom, &msb);
top = rb_time_val_cnt(top, cnt);
bottom = rb_time_val_cnt(bottom, cnt);
- rb_time_split(set, &top2, &bottom2);
+ rb_time_split(set, &top2, &bottom2, &msb2);
top2 = rb_time_val_cnt(top2, cnt2);
bottom2 = rb_time_val_cnt(bottom2, cnt2);
if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
return false;
+ if (!rb_time_read_cmpxchg(&t->msb, msb, msb2))
+ return false;
if (!rb_time_read_cmpxchg(&t->top, top, top2))
return false;
if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
@@ -783,6 +799,24 @@ static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
}
#endif
+/*
+ * The absolute time stamp drops the 5 MSBs and some clocks may
+ * require them. The rb_fix_abs_ts() will take a previous full
+ * time stamp, and add the 5 MSB of that time stamp on to the
+ * saved absolute time stamp. Then they are compared in case of
+ * the unlikely event that the latest time stamp incremented
+ * the 5 MSB.
+ */
+static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts)
+{
+ if (save_ts & TS_MSB) {
+ abs |= save_ts & TS_MSB;
+ /* Check for overflow */
+ if (unlikely(abs < save_ts))
+ abs += 1ULL << 59;
+ }
+ return abs;
+}
static inline u64 rb_time_stamp(struct trace_buffer *buffer);
@@ -811,8 +845,10 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
u64 ts;
/* If the event includes an absolute time, then just use that */
- if (event->type_len == RINGBUF_TYPE_TIME_STAMP)
- return rb_event_time_stamp(event);
+ if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+ ts = rb_event_time_stamp(event);
+ return rb_fix_abs_ts(ts, cpu_buffer->tail_page->page->time_stamp);
+ }
nest = local_read(&cpu_buffer->committing);
verify_event(cpu_buffer, event);
@@ -2754,8 +2790,15 @@ static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
(RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE);
if (unlikely(info->delta > (1ULL << 59))) {
+ /*
+ * Some timers can use more than 59 bits, and when a timestamp
+ * is added to the buffer, it will lose those bits.
+ */
+ if (abs && (info->ts & TS_MSB)) {
+ info->delta &= ABS_TS_MASK;
+
/* did the clock go backwards */
- if (info->before == info->after && info->before > info->ts) {
+ } else if (info->before == info->after && info->before > info->ts) {
/* not interrupted */
static int once;
@@ -3304,7 +3347,7 @@ static void dump_buffer_page(struct buffer_data_page *bpage,
case RINGBUF_TYPE_TIME_STAMP:
delta = rb_event_time_stamp(event);
- ts = delta;
+ ts = rb_fix_abs_ts(delta, ts);
pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
break;
@@ -3380,7 +3423,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
case RINGBUF_TYPE_TIME_STAMP:
delta = rb_event_time_stamp(event);
- ts = delta;
+ ts = rb_fix_abs_ts(delta, ts);
break;
case RINGBUF_TYPE_PADDING:
@@ -4367,6 +4410,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
case RINGBUF_TYPE_TIME_STAMP:
delta = rb_event_time_stamp(event);
+ delta = rb_fix_abs_ts(delta, cpu_buffer->read_stamp);
cpu_buffer->read_stamp = delta;
return;
@@ -4397,6 +4441,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
case RINGBUF_TYPE_TIME_STAMP:
delta = rb_event_time_stamp(event);
+ delta = rb_fix_abs_ts(delta, iter->read_stamp);
iter->read_stamp = delta;
return;
@@ -4650,6 +4695,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
case RINGBUF_TYPE_TIME_STAMP:
if (ts) {
*ts = rb_event_time_stamp(event);
+ *ts = rb_fix_abs_ts(*ts, reader->page->time_stamp);
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
}
@@ -4741,6 +4787,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
case RINGBUF_TYPE_TIME_STAMP:
if (ts) {
*ts = rb_event_time_stamp(event);
+ *ts = rb_fix_abs_ts(*ts, iter->head_page->page->time_stamp);
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
}
@@ -6011,10 +6058,10 @@ static __init int test_ringbuffer(void)
pr_info(" total events: %ld\n", total_lost + total_read);
pr_info(" recorded len bytes: %ld\n", total_len);
pr_info(" recorded size bytes: %ld\n", total_size);
- if (total_lost)
+ if (total_lost) {
pr_info(" With dropped events, record len and size may not match\n"
" alloced and written from above\n");
- if (!total_lost) {
+ } else {
if (RB_WARN_ON(buffer, total_len != total_alloc ||
total_size != total_written))
break;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 124f1897fd56..2c95992e2c71 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -721,13 +721,16 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
pos = 0;
ret = trace_get_user(&parser, ubuf, cnt, &pos);
- if (ret < 0 || !trace_parser_loaded(&parser))
+ if (ret < 0)
break;
read += ret;
ubuf += ret;
cnt -= ret;
+ if (!trace_parser_loaded(&parser))
+ break;
+
ret = -EINVAL;
if (kstrtoul(parser.buffer, 0, &val))
break;
@@ -753,7 +756,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
if (!nr_pids) {
/* Cleared the list of pids */
trace_pid_list_free(pid_list);
- read = ret;
pid_list = NULL;
}
@@ -1174,7 +1176,7 @@ void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
/**
- * tracing_snapshot_cond_data - get the user data associated with a snapshot
+ * tracing_cond_snapshot_data - get the user data associated with a snapshot
* @tr: The tracing instance
*
* When the user enables a conditional snapshot using
@@ -1542,6 +1544,7 @@ static struct {
{ ktime_get_mono_fast_ns, "mono", 1 },
{ ktime_get_raw_fast_ns, "mono_raw", 1 },
{ ktime_get_boot_fast_ns, "boot", 1 },
+ { ktime_get_tai_fast_ns, "tai", 1 },
ARCH_TRACE_CLOCKS
};
@@ -2835,7 +2838,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
-static DEFINE_SPINLOCK(tracepoint_iter_lock);
+static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
static DEFINE_MUTEX(tracepoint_printk_mutex);
static void output_printk(struct trace_event_buffer *fbuffer)
@@ -2863,14 +2866,14 @@ static void output_printk(struct trace_event_buffer *fbuffer)
event = &fbuffer->trace_file->event_call->event;
- spin_lock_irqsave(&tracepoint_iter_lock, flags);
+ raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
trace_seq_init(&iter->seq);
iter->ent = fbuffer->entry;
event_call->event.funcs->trace(iter, 0, event);
trace_seq_putc(&iter->seq, 0);
printk("%s", iter->seq.buffer);
- spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+ raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
}
int tracepoint_printk_sysctl(struct ctl_table *table, int write,
@@ -4249,7 +4252,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
- const char *space = " ";
+ static const char space[] = " ";
int prec = tgid ? 12 : 2;
print_event_info(buf, m);
@@ -4273,9 +4276,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
struct tracer *type = iter->trace;
unsigned long entries;
unsigned long total;
- const char *name = "preemption";
-
- name = type->name;
+ const char *name = type->name;
get_total_entries(buf, &total, &entries);
@@ -5469,7 +5470,7 @@ static const char readme_msg[] =
" error_log\t- error log for failed commands (that support it)\n"
" buffer_size_kb\t- view and modify size of per cpu buffer\n"
" buffer_total_size_kb - view total size of all cpu buffers\n\n"
- " trace_clock\t\t-change the clock used to order events\n"
+ " trace_clock\t\t- change the clock used to order events\n"
" local: Per cpu clock but may not be synced across CPUs\n"
" global: Synced across CPUs but slows tracing down.\n"
" counter: Not a clock, but just an increment\n"
@@ -5478,7 +5479,7 @@ static const char readme_msg[] =
#ifdef CONFIG_X86_64
" x86-tsc: TSC cycle counter\n"
#endif
- "\n timestamp_mode\t-view the mode used to timestamp events\n"
+ "\n timestamp_mode\t- view the mode used to timestamp events\n"
" delta: Delta difference against a buffer-wide timestamp\n"
" absolute: Absolute (standalone) timestamp\n"
"\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
@@ -6326,12 +6327,18 @@ static void tracing_set_nop(struct trace_array *tr)
tr->current_trace = &nop_trace;
}
+static bool tracer_options_updated;
+
static void add_tracer_options(struct trace_array *tr, struct tracer *t)
{
/* Only enable if the directory has been created already. */
if (!tr->dir)
return;
+ /* Only create trace option files after update_tracer_options finish */
+ if (!tracer_options_updated)
+ return;
+
create_trace_option_files(tr, t);
}
@@ -6448,7 +6455,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
{
struct trace_array *tr = filp->private_data;
char buf[MAX_TRACER_SIZE+1];
- int i;
+ char *name;
size_t ret;
int err;
@@ -6462,11 +6469,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
buf[cnt] = 0;
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ name = strim(buf);
- err = tracing_set_tracer(tr, buf);
+ err = tracing_set_tracer(tr, name);
if (err)
return err;
@@ -9170,6 +9175,7 @@ static void __update_tracer_options(struct trace_array *tr)
static void update_tracer_options(struct trace_array *tr)
{
mutex_lock(&trace_types_lock);
+ tracer_options_updated = true;
__update_tracer_options(tr);
mutex_unlock(&trace_types_lock);
}
@@ -9602,6 +9608,7 @@ extern struct trace_eval_map *__stop_ftrace_eval_maps[];
static struct workqueue_struct *eval_map_wq __initdata;
static struct work_struct eval_map_work __initdata;
+static struct work_struct tracerfs_init_work __initdata;
static void __init eval_map_work_func(struct work_struct *work)
{
@@ -9627,6 +9634,8 @@ static int __init trace_eval_init(void)
return 0;
}
+subsys_initcall(trace_eval_init);
+
static int __init trace_eval_sync(void)
{
/* Make sure the eval map updates are finished */
@@ -9709,15 +9718,8 @@ static struct notifier_block trace_module_nb = {
};
#endif /* CONFIG_MODULES */
-static __init int tracer_init_tracefs(void)
+static __init void tracer_init_tracefs_work_func(struct work_struct *work)
{
- int ret;
-
- trace_access_lock_init();
-
- ret = tracing_init_dentry();
- if (ret)
- return 0;
event_trace_init();
@@ -9739,8 +9741,6 @@ static __init int tracer_init_tracefs(void)
trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
NULL, &tracing_saved_tgids_fops);
- trace_eval_init();
-
trace_create_eval_file(NULL);
#ifdef CONFIG_MODULES
@@ -9755,6 +9755,24 @@ static __init int tracer_init_tracefs(void)
create_trace_instances(NULL);
update_tracer_options(&global_trace);
+}
+
+static __init int tracer_init_tracefs(void)
+{
+ int ret;
+
+ trace_access_lock_init();
+
+ ret = tracing_init_dentry();
+ if (ret)
+ return 0;
+
+ if (eval_map_wq) {
+ INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
+ queue_work(eval_map_wq, &tracerfs_init_work);
+ } else {
+ tracer_init_tracefs_work_func(NULL);
+ }
return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 07d990270e2a..ff816fb41e48 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1573,13 +1573,12 @@ struct enable_trigger_data {
};
extern int event_enable_trigger_print(struct seq_file *m,
- struct event_trigger_ops *ops,
- struct event_trigger_data *data);
-extern void event_enable_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data);
+extern void event_enable_trigger_free(struct event_trigger_data *data);
extern int event_enable_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+ char *glob, char *cmd,
+ char *param_and_filter);
extern int event_enable_register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file);
@@ -1587,8 +1586,7 @@ extern void event_enable_unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file);
extern void trigger_data_free(struct event_trigger_data *data);
-extern int event_trigger_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data);
+extern int event_trigger_init(struct event_trigger_data *data);
extern int trace_event_trigger_enable_disable(struct trace_event_file *file,
int trigger_enable);
extern void update_cond_flag(struct trace_event_file *file);
@@ -1629,10 +1627,11 @@ extern void event_trigger_reset_filter(struct event_command *cmd_ops,
extern int event_trigger_register(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob,
- char *cmd,
- char *trigger,
- struct event_trigger_data *trigger_data,
- int *n_registered);
+ struct event_trigger_data *trigger_data);
+extern void event_trigger_unregister(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ struct event_trigger_data *trigger_data);
/**
* struct event_trigger_ops - callbacks for trace event triggers
@@ -1686,12 +1685,9 @@ struct event_trigger_ops {
struct trace_buffer *buffer,
void *rec,
struct ring_buffer_event *rbe);
- int (*init)(struct event_trigger_ops *ops,
- struct event_trigger_data *data);
- void (*free)(struct event_trigger_ops *ops,
- struct event_trigger_data *data);
+ int (*init)(struct event_trigger_data *data);
+ void (*free)(struct event_trigger_data *data);
int (*print)(struct seq_file *m,
- struct event_trigger_ops *ops,
struct event_trigger_data *data);
};
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 0580287d7a0d..778200dd8ede 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -300,7 +300,7 @@ trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
{
struct xbc_node *node;
const char *p, *handler;
- int ret;
+ int ret = 0;
handler = xbc_node_get_data(hnode);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index e34e8182ee4b..076b447a1b88 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -255,19 +255,14 @@ static const struct file_operations dynamic_events_ops = {
/* Make a tracefs interface for controlling dynamic events */
static __init int init_dynamic_event(void)
{
- struct dentry *entry;
int ret;
ret = tracing_init_dentry();
if (ret)
return 0;
- entry = tracefs_create_file("dynamic_events", TRACE_MODE_WRITE, NULL,
- NULL, &dynamic_events_ops);
-
- /* Event list interface */
- if (!entry)
- pr_warn("Could not create tracefs 'dynamic_events' entry\n");
+ trace_create_file("dynamic_events", TRACE_MODE_WRITE, NULL,
+ NULL, &dynamic_events_ops);
return 0;
}
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 541aa13581b9..7d4478525c66 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -511,20 +511,17 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
* functions are just stubs to fulfill what is needed to use the trigger
* infrastructure.
*/
-static int eprobe_trigger_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static int eprobe_trigger_init(struct event_trigger_data *data)
{
return 0;
}
-static void eprobe_trigger_free(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static void eprobe_trigger_free(struct event_trigger_data *data)
{
}
static int eprobe_trigger_print(struct seq_file *m,
- struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
/* Do not print eprobe event triggers */
@@ -549,7 +546,8 @@ static struct event_trigger_ops eprobe_trigger_ops = {
static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+ char *glob, char *cmd,
+ char *param_and_filter)
{
return -1;
}
@@ -650,7 +648,7 @@ static struct trace_event_functions eprobe_funcs = {
static int disable_eprobe(struct trace_eprobe *ep,
struct trace_array *tr)
{
- struct event_trigger_data *trigger;
+ struct event_trigger_data *trigger = NULL, *iter;
struct trace_event_file *file;
struct eprobe_data *edata;
@@ -658,14 +656,16 @@ static int disable_eprobe(struct trace_eprobe *ep,
if (!file)
return -ENOENT;
- list_for_each_entry(trigger, &file->triggers, list) {
- if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
+ list_for_each_entry(iter, &file->triggers, list) {
+ if (!(iter->flags & EVENT_TRIGGER_FL_PROBE))
continue;
- edata = trigger->private_data;
- if (edata->ep == ep)
+ edata = iter->private_data;
+ if (edata->ep == ep) {
+ trigger = iter;
break;
+ }
}
- if (list_entry_is_head(trigger, &file->triggers, list))
+ if (!trigger)
return -ENODEV;
list_del_rcu(&trigger->list);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f97de82d1342..181f08186d32 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -392,12 +392,6 @@ static void test_event_printk(struct trace_event_call *call)
if (!(dereference_flags & (1ULL << arg)))
goto next_arg;
- /* Check for __get_sockaddr */;
- if (str_has_prefix(fmt + i, "__get_sockaddr(")) {
- dereference_flags &= ~(1ULL << arg);
- goto next_arg;
- }
-
/* Find the REC-> in the argument */
c = strchr(fmt + i, ',');
r = strstr(fmt + i, "REC->");
@@ -413,7 +407,14 @@ static void test_event_printk(struct trace_event_call *call)
a = strchr(fmt + i, '&');
if ((a && (a < r)) || test_field(r, call))
dereference_flags &= ~(1ULL << arg);
+ } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
+ (!c || r < c)) {
+ dereference_flags &= ~(1ULL << arg);
+ } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
+ (!c || r < c)) {
+ dereference_flags &= ~(1ULL << arg);
}
+
next_arg:
i--;
arg++;
@@ -1723,9 +1724,9 @@ static LIST_HEAD(event_subsystems);
static int subsystem_open(struct inode *inode, struct file *filp)
{
+ struct trace_subsystem_dir *dir = NULL, *iter_dir;
+ struct trace_array *tr = NULL, *iter_tr;
struct event_subsystem *system = NULL;
- struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
- struct trace_array *tr;
int ret;
if (tracing_is_disabled())
@@ -1734,10 +1735,12 @@ static int subsystem_open(struct inode *inode, struct file *filp)
/* Make sure the system still exists */
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- list_for_each_entry(dir, &tr->systems, list) {
- if (dir == inode->i_private) {
+ list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
+ list_for_each_entry(iter_dir, &iter_tr->systems, list) {
+ if (iter_dir == inode->i_private) {
/* Don't open systems with no events */
+ tr = iter_tr;
+ dir = iter_dir;
if (dir->nr_events) {
__get_system_dir(dir);
system = dir->subsystem;
@@ -1753,9 +1756,6 @@ static int subsystem_open(struct inode *inode, struct file *filp)
if (!system)
return -ENODEV;
- /* Some versions of gcc think dir can be uninitialized here */
- WARN_ON(!dir);
-
/* Still need to increment the ref count of the system */
if (trace_array_get(tr) < 0) {
put_system(dir);
@@ -2280,8 +2280,8 @@ static struct dentry *
event_subsystem_dir(struct trace_array *tr, const char *name,
struct trace_event_file *file, struct dentry *parent)
{
+ struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
- struct event_subsystem *system;
struct dentry *entry;
/* First see if we did not already create this dir */
@@ -2295,13 +2295,13 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
/* Now see if the system itself exists. */
- list_for_each_entry(system, &event_subsystems, list) {
- if (strcmp(system->name, name) == 0)
+ system = NULL;
+ list_for_each_entry(iter, &event_subsystems, list) {
+ if (strcmp(iter->name, name) == 0) {
+ system = iter;
break;
+ }
}
- /* Reset system variable when not found */
- if (&system->list == &event_subsystems)
- system = NULL;
dir = kmalloc(sizeof(*dir), GFP_KERNEL);
if (!dir)
@@ -3546,12 +3546,10 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
struct dentry *d_events;
struct dentry *entry;
- entry = tracefs_create_file("set_event", TRACE_MODE_WRITE, parent,
- tr, &ftrace_set_event_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'set_event' entry\n");
+ entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_fops);
+ if (!entry)
return -ENOMEM;
- }
d_events = tracefs_create_dir("events", parent);
if (!d_events) {
@@ -3566,16 +3564,12 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
/* There are not as crucial, just warn if they are not created */
- entry = tracefs_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
- tr, &ftrace_set_event_pid_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'set_event_pid' entry\n");
+ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_pid_fops);
- entry = tracefs_create_file("set_event_notrace_pid",
- TRACE_MODE_WRITE, parent, tr,
- &ftrace_set_event_notrace_pid_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
+ trace_create_file("set_event_notrace_pid",
+ TRACE_MODE_WRITE, parent, tr,
+ &ftrace_set_event_notrace_pid_fops);
/* ring buffer internal formats */
trace_create_file("header_page", TRACE_MODE_READ, d_events,
@@ -3790,17 +3784,14 @@ static __init int event_trace_init_fields(void)
__init int event_trace_init(void)
{
struct trace_array *tr;
- struct dentry *entry;
int ret;
tr = top_trace_array();
if (!tr)
return -ENODEV;
- entry = tracefs_create_file("available_events", TRACE_MODE_READ,
- NULL, tr, &ftrace_avail_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'available_events' entry\n");
+ trace_create_file("available_events", TRACE_MODE_READ,
+ NULL, tr, &ftrace_avail_fops);
ret = early_event_add_tracer(NULL, tr);
if (ret)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index b458a9afa2c0..4b1057ab9d96 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1816,7 +1816,7 @@ static void create_filter_finish(struct filter_parse_error *pe)
* create_filter - create a filter for a trace_event_call
* @tr: the trace array associated with these events
* @call: trace_event_call to create a filter for
- * @filter_str: filter string
+ * @filter_string: filter string
* @set_str: remember @filter_str and enable detailed error in filter
* @filterp: out param for created filter (always updated on return)
* Must be a pointer that references a NULL pointer.
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 44db5ba9cabb..48e82e141d54 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2093,8 +2093,11 @@ static int init_var_ref(struct hist_field *ref_field,
return err;
free:
kfree(ref_field->system);
+ ref_field->system = NULL;
kfree(ref_field->event_name);
+ ref_field->event_name = NULL;
kfree(ref_field->name);
+ ref_field->name = NULL;
goto out;
}
@@ -2785,7 +2788,8 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data,
static struct event_command trigger_hist_cmd;
static int event_hist_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+ char *glob, char *cmd,
+ char *param_and_filter);
static bool compatible_keys(struct hist_trigger_data *target_hist_data,
struct hist_trigger_data *hist_data,
@@ -4161,7 +4165,7 @@ static int create_val_field(struct hist_trigger_data *hist_data,
return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
}
-static const char *no_comm = "(no comm)";
+static const char no_comm[] = "(no comm)";
static u64 hist_field_execname(struct hist_field *hist_field,
struct tracing_map_elt *elt,
@@ -5252,7 +5256,7 @@ static void hist_trigger_show(struct seq_file *m,
seq_puts(m, "\n\n");
seq_puts(m, "# event histogram\n#\n# trigger info: ");
- data->ops->print(m, data->ops, data);
+ data->ops->print(m, data);
seq_puts(m, "#\n\n");
hist_data = data->private_data;
@@ -5484,7 +5488,7 @@ static void hist_trigger_debug_show(struct seq_file *m,
seq_puts(m, "\n\n");
seq_puts(m, "# event histogram\n#\n# trigger info: ");
- data->ops->print(m, data->ops, data);
+ data->ops->print(m, data);
seq_puts(m, "#\n\n");
hist_data = data->private_data;
@@ -5621,7 +5625,6 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
}
static int event_hist_trigger_print(struct seq_file *m,
- struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
@@ -5729,8 +5732,7 @@ static int event_hist_trigger_print(struct seq_file *m,
return 0;
}
-static int event_hist_trigger_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static int event_hist_trigger_init(struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
@@ -5758,8 +5760,7 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
}
}
-static void event_hist_trigger_free(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static void event_hist_trigger_free(struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
@@ -5788,25 +5789,23 @@ static struct event_trigger_ops event_hist_trigger_ops = {
.free = event_hist_trigger_free,
};
-static int event_hist_trigger_named_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static int event_hist_trigger_named_init(struct event_trigger_data *data)
{
data->ref++;
save_named_trigger(data->named_data->name, data);
- event_hist_trigger_init(ops, data->named_data);
+ event_hist_trigger_init(data->named_data);
return 0;
}
-static void event_hist_trigger_named_free(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+static void event_hist_trigger_named_free(struct event_trigger_data *data)
{
if (WARN_ON_ONCE(data->ref <= 0))
return;
- event_hist_trigger_free(ops, data->named_data);
+ event_hist_trigger_free(data->named_data);
data->ref--;
if (!data->ref) {
@@ -5933,6 +5932,48 @@ static bool hist_trigger_match(struct event_trigger_data *data,
return true;
}
+static bool existing_hist_update_only(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
+{
+ struct hist_trigger_data *hist_data = data->private_data;
+ struct event_trigger_data *test, *named_data = NULL;
+ bool updated = false;
+
+ if (!hist_data->attrs->pause && !hist_data->attrs->cont &&
+ !hist_data->attrs->clear)
+ goto out;
+
+ if (hist_data->attrs->name) {
+ named_data = find_named_trigger(hist_data->attrs->name);
+ if (named_data) {
+ if (!hist_trigger_match(data, named_data, named_data,
+ true))
+ goto out;
+ }
+ }
+
+ if (hist_data->attrs->name && !named_data)
+ goto out;
+
+ list_for_each_entry(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ if (!hist_trigger_match(data, test, named_data, false))
+ continue;
+ if (hist_data->attrs->pause)
+ test->paused = true;
+ else if (hist_data->attrs->cont)
+ test->paused = false;
+ else if (hist_data->attrs->clear)
+ hist_clear(test);
+ updated = true;
+ goto out;
+ }
+ }
+ out:
+ return updated;
+}
+
static int hist_register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
@@ -5961,19 +6002,11 @@ static int hist_register_trigger(char *glob,
list_for_each_entry(test, &file->triggers, list) {
if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
- if (!hist_trigger_match(data, test, named_data, false))
- continue;
- if (hist_data->attrs->pause)
- test->paused = true;
- else if (hist_data->attrs->cont)
- test->paused = false;
- else if (hist_data->attrs->clear)
- hist_clear(test);
- else {
+ if (hist_trigger_match(data, test, named_data, false)) {
hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0);
ret = -EEXIST;
+ goto out;
}
- goto out;
}
}
new:
@@ -5993,7 +6026,7 @@ static int hist_register_trigger(char *glob,
}
if (data->ops->init) {
- ret = data->ops->init(data->ops, data);
+ ret = data->ops->init(data);
if (ret < 0)
goto out;
}
@@ -6012,8 +6045,6 @@ static int hist_register_trigger(char *glob,
if (named_data)
destroy_hist_data(hist_data);
-
- ret++;
out:
return ret;
}
@@ -6089,20 +6120,19 @@ static void hist_unregister_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
+ struct event_trigger_data *test = NULL, *iter, *named_data = NULL;
struct hist_trigger_data *hist_data = data->private_data;
- struct event_trigger_data *test, *named_data = NULL;
- bool unregistered = false;
lockdep_assert_held(&event_mutex);
if (hist_data->attrs->name)
named_data = find_named_trigger(hist_data->attrs->name);
- list_for_each_entry(test, &file->triggers, list) {
- if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
- if (!hist_trigger_match(data, test, named_data, false))
+ list_for_each_entry(iter, &file->triggers, list) {
+ if (iter->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ if (!hist_trigger_match(data, iter, named_data, false))
continue;
- unregistered = true;
+ test = iter;
list_del_rcu(&test->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
@@ -6110,11 +6140,11 @@ static void hist_unregister_trigger(char *glob,
}
}
- if (unregistered && test->ops->free)
- test->ops->free(test->ops, test);
+ if (test && test->ops->free)
+ test->ops->free(test);
if (hist_data->enable_timestamps) {
- if (!hist_data->remove || unregistered)
+ if (!hist_data->remove || test)
tracing_set_filter_buffering(file->tr, false);
}
}
@@ -6164,57 +6194,57 @@ static void hist_unreg_all(struct trace_event_file *file)
if (hist_data->enable_timestamps)
tracing_set_filter_buffering(file->tr, false);
if (test->ops->free)
- test->ops->free(test->ops, test);
+ test->ops->free(test);
}
}
}
static int event_hist_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+ char *glob, char *cmd,
+ char *param_and_filter)
{
unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT;
struct event_trigger_data *trigger_data;
struct hist_trigger_attrs *attrs;
- struct event_trigger_ops *trigger_ops;
struct hist_trigger_data *hist_data;
+ char *param, *filter, *p, *start;
struct synth_event *se;
const char *se_name;
- bool remove = false;
- char *trigger, *p, *start;
+ bool remove;
int ret = 0;
lockdep_assert_held(&event_mutex);
- WARN_ON(!glob);
+ if (WARN_ON(!glob))
+ return -EINVAL;
- if (strlen(glob)) {
+ if (glob[0]) {
hist_err_clear();
- last_cmd_set(file, param);
+ last_cmd_set(file, param_and_filter);
}
- if (!param)
- return -EINVAL;
+ remove = event_trigger_check_remove(glob);
- if (glob[0] == '!')
- remove = true;
+ if (event_trigger_empty_param(param_and_filter))
+ return -EINVAL;
/*
* separate the trigger from the filter (k:v [if filter])
* allowing for whitespace in the trigger
*/
- p = trigger = param;
+ p = param = param_and_filter;
do {
p = strstr(p, "if");
if (!p)
break;
- if (p == param)
+ if (p == param_and_filter)
return -EINVAL;
if (*(p - 1) != ' ' && *(p - 1) != '\t') {
p++;
continue;
}
- if (p >= param + strlen(param) - (sizeof("if") - 1) - 1)
+ if (p >= param_and_filter + strlen(param_and_filter) - (sizeof("if") - 1) - 1)
return -EINVAL;
if (*(p + sizeof("if") - 1) != ' ' && *(p + sizeof("if") - 1) != '\t') {
p++;
@@ -6224,24 +6254,24 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
} while (1);
if (!p)
- param = NULL;
+ filter = NULL;
else {
*(p - 1) = '\0';
- param = strstrip(p);
- trigger = strstrip(trigger);
+ filter = strstrip(p);
+ param = strstrip(param);
}
/*
* To simplify arithmetic expression parsing, replace occurrences of
* '.sym-offset' modifier with '.symXoffset'
*/
- start = strstr(trigger, ".sym-offset");
+ start = strstr(param, ".sym-offset");
while (start) {
*(start + 4) = 'X';
start = strstr(start + 11, ".sym-offset");
}
- attrs = parse_hist_trigger_attrs(file->tr, trigger);
+ attrs = parse_hist_trigger_attrs(file->tr, param);
if (IS_ERR(attrs))
return PTR_ERR(attrs);
@@ -6254,29 +6284,15 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
return PTR_ERR(hist_data);
}
- trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
-
- trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ trigger_data = event_trigger_alloc(cmd_ops, cmd, param, hist_data);
if (!trigger_data) {
ret = -ENOMEM;
goto out_free;
}
- trigger_data->count = -1;
- trigger_data->ops = trigger_ops;
- trigger_data->cmd_ops = cmd_ops;
-
- INIT_LIST_HEAD(&trigger_data->list);
- RCU_INIT_POINTER(trigger_data->filter, NULL);
-
- trigger_data->private_data = hist_data;
-
- /* if param is non-empty, it's supposed to be a filter */
- if (param && cmd_ops->set_filter) {
- ret = cmd_ops->set_filter(param, trigger_data, file);
- if (ret < 0)
- goto out_free;
- }
+ ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data);
+ if (ret < 0)
+ goto out_free;
if (remove) {
if (!have_hist_trigger_match(trigger_data, file))
@@ -6287,7 +6303,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_free;
}
- cmd_ops->unreg(glob+1, trigger_data, file);
+ event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
@@ -6296,17 +6312,11 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_free;
}
- ret = cmd_ops->reg(glob, trigger_data, file);
- /*
- * The above returns on success the # of triggers registered,
- * but if it didn't register any it returns zero. Consider no
- * triggers registered a failure too.
- */
- if (!ret) {
- if (!(attrs->pause || attrs->cont || attrs->clear))
- ret = -ENOENT;
+ if (existing_hist_update_only(glob, trigger_data, file))
goto out_free;
- } else if (ret < 0)
+
+ ret = event_trigger_register(cmd_ops, file, glob, trigger_data);
+ if (ret < 0)
goto out_free;
if (get_named_trigger_data(trigger_data))
@@ -6331,18 +6341,15 @@ enable:
se = find_synth_event(se_name);
if (se)
se->ref++;
- /* Just return zero, not the number of registered triggers */
- ret = 0;
out:
if (ret == 0)
hist_err_clear();
return ret;
out_unreg:
- cmd_ops->unreg(glob+1, trigger_data, file);
+ event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
out_free:
- if (cmd_ops->set_filter)
- cmd_ops->set_filter(NULL, trigger_data, NULL);
+ event_trigger_reset_filter(cmd_ops, trigger_data);
remove_hist_vars(hist_data);
@@ -6463,7 +6470,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file)
update_cond_flag(file);
trace_event_trigger_enable_disable(file, 0);
if (test->ops->free)
- test->ops->free(test->ops, test);
+ test->ops->free(test);
}
}
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 7eb9d04f1c2e..cb866c3141af 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -188,7 +188,7 @@ static int trigger_show(struct seq_file *m, void *v)
}
data = list_entry(v, struct event_trigger_data, list);
- data->ops->print(m, data->ops, data);
+ data->ops->print(m, data);
return 0;
}
@@ -432,7 +432,6 @@ event_trigger_print(const char *name, struct seq_file *m,
/**
* event_trigger_init - Generic event_trigger_ops @init implementation
- * @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data
*
* Common implementation of event trigger initialization.
@@ -442,8 +441,7 @@ event_trigger_print(const char *name, struct seq_file *m,
*
* Return: 0 on success, errno otherwise
*/
-int event_trigger_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+int event_trigger_init(struct event_trigger_data *data)
{
data->ref++;
return 0;
@@ -451,7 +449,6 @@ int event_trigger_init(struct event_trigger_ops *ops,
/**
* event_trigger_free - Generic event_trigger_ops @free implementation
- * @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data
*
* Common implementation of event trigger de-initialization.
@@ -460,8 +457,7 @@ int event_trigger_init(struct event_trigger_ops *ops,
* implementations.
*/
static void
-event_trigger_free(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+event_trigger_free(struct event_trigger_data *data)
{
if (WARN_ON_ONCE(data->ref <= 0))
return;
@@ -515,7 +511,7 @@ clear_event_triggers(struct trace_array *tr)
trace_event_trigger_enable_disable(file, 0);
list_del_rcu(&data->list);
if (data->ops->free)
- data->ops->free(data->ops, data);
+ data->ops->free(data);
}
}
}
@@ -581,19 +577,18 @@ static int register_trigger(char *glob,
}
if (data->ops->init) {
- ret = data->ops->init(data->ops, data);
+ ret = data->ops->init(data);
if (ret < 0)
goto out;
}
list_add_rcu(&data->list, &file->triggers);
- ret++;
update_cond_flag(file);
- if (trace_event_trigger_enable_disable(file, 1) < 0) {
+ ret = trace_event_trigger_enable_disable(file, 1);
+ if (ret < 0) {
list_del_rcu(&data->list);
update_cond_flag(file);
- ret--;
}
out:
return ret;
@@ -614,14 +609,13 @@ static void unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file)
{
- struct event_trigger_data *data;
- bool unregistered = false;
+ struct event_trigger_data *data = NULL, *iter;
lockdep_assert_held(&event_mutex);
- list_for_each_entry(data, &file->triggers, list) {
- if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) {
- unregistered = true;
+ list_for_each_entry(iter, &file->triggers, list) {
+ if (iter->cmd_ops->trigger_type == test->cmd_ops->trigger_type) {
+ data = iter;
list_del_rcu(&data->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
@@ -629,8 +623,8 @@ static void unregister_trigger(char *glob,
}
}
- if (unregistered && data->ops->free)
- data->ops->free(data->ops, data);
+ if (data && data->ops->free)
+ data->ops->free(data);
}
/*
@@ -744,15 +738,15 @@ bool event_trigger_empty_param(const char *param)
/**
* event_trigger_separate_filter - separate an event trigger from a filter
- * @param: The param string containing trigger and possibly filter
- * @trigger: outparam, will be filled with a pointer to the trigger
+ * @param_and_filter: String containing trigger and possibly filter
+ * @param: outparam, will be filled with a pointer to the trigger
* @filter: outparam, will be filled with a pointer to the filter
* @param_required: Specifies whether or not the param string is required
*
* Given a param string of the form '[trigger] [if filter]', this
* function separates the filter from the trigger and returns the
- * trigger in *trigger and the filter in *filter. Either the *trigger
- * or the *filter may be set to NULL by this function - if not set to
+ * trigger in @param and the filter in @filter. Either the @param
+ * or the @filter may be set to NULL by this function - if not set to
* NULL, they will contain strings corresponding to the trigger and
* filter.
*
@@ -927,48 +921,37 @@ void event_trigger_reset_filter(struct event_command *cmd_ops,
* @cmd_ops: The event_command operations for the trigger
* @file: The event file for the trigger's event
* @glob: The trigger command string, with optional remove(!) operator
- * @cmd: The cmd string
- * @param: The param string
* @trigger_data: The trigger_data for the trigger
- * @n_registered: optional outparam, the number of triggers registered
*
* Register an event trigger. The @cmd_ops are used to call the
- * cmd_ops->reg() function which actually does the registration. The
- * cmd_ops->reg() function returns the number of triggers registered,
- * which is assigned to n_registered, if n_registered is non-NULL.
+ * cmd_ops->reg() function which actually does the registration.
*
* Return: 0 on success, errno otherwise
*/
int event_trigger_register(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob,
- char *cmd,
- char *param,
- struct event_trigger_data *trigger_data,
- int *n_registered)
+ struct event_trigger_data *trigger_data)
{
- int ret;
-
- if (n_registered)
- *n_registered = 0;
-
- ret = cmd_ops->reg(glob, trigger_data, file);
- /*
- * The above returns on success the # of functions enabled,
- * but if it didn't find any functions it returns zero.
- * Consider no functions a failure too.
- */
- if (!ret) {
- cmd_ops->unreg(glob, trigger_data, file);
- ret = -ENOENT;
- } else if (ret > 0) {
- if (n_registered)
- *n_registered = ret;
- /* Just return zero, not the number of enabled functions */
- ret = 0;
- }
+ return cmd_ops->reg(glob, trigger_data, file);
+}
- return ret;
+/**
+ * event_trigger_unregister - unregister an event trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @glob: The trigger command string, with optional remove(!) operator
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Unregister an event trigger. The @cmd_ops are used to call the
+ * cmd_ops->unreg() function which actually does the unregistration.
+ */
+void event_trigger_unregister(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ struct event_trigger_data *trigger_data)
+{
+ cmd_ops->unreg(glob, trigger_data, file);
}
/*
@@ -981,7 +964,7 @@ int event_trigger_register(struct event_command *cmd_ops,
* @file: The trace_event_file associated with the event
* @glob: The raw string used to register the trigger
* @cmd: The cmd portion of the string used to register the trigger
- * @param: The params portion of the string used to register the trigger
+ * @param_and_filter: The param and filter portion of the string used to register the trigger
*
* Common implementation for event command parsing and trigger
* instantiation.
@@ -994,94 +977,53 @@ int event_trigger_register(struct event_command *cmd_ops,
static int
event_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+ char *glob, char *cmd, char *param_and_filter)
{
struct event_trigger_data *trigger_data;
- struct event_trigger_ops *trigger_ops;
- char *trigger = NULL;
- char *number;
+ char *param, *filter;
+ bool remove;
int ret;
- /* separate the trigger from the filter (t:n [if filter]) */
- if (param && isdigit(param[0])) {
- trigger = strsep(&param, " \t");
- if (param) {
- param = skip_spaces(param);
- if (!*param)
- param = NULL;
- }
- }
+ remove = event_trigger_check_remove(glob);
- trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
+ ret = event_trigger_separate_filter(param_and_filter, &param, &filter, false);
+ if (ret)
+ return ret;
ret = -ENOMEM;
- trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ trigger_data = event_trigger_alloc(cmd_ops, cmd, param, file);
if (!trigger_data)
goto out;
- trigger_data->count = -1;
- trigger_data->ops = trigger_ops;
- trigger_data->cmd_ops = cmd_ops;
- trigger_data->private_data = file;
- INIT_LIST_HEAD(&trigger_data->list);
- INIT_LIST_HEAD(&trigger_data->named_list);
-
- if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_data, file);
+ if (remove) {
+ event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
kfree(trigger_data);
ret = 0;
goto out;
}
- if (trigger) {
- number = strsep(&trigger, ":");
-
- ret = -EINVAL;
- if (!strlen(number))
- goto out_free;
-
- /*
- * We use the callback data field (which is a pointer)
- * as our counter.
- */
- ret = kstrtoul(number, 0, &trigger_data->count);
- if (ret)
- goto out_free;
- }
-
- if (!param) /* if param is non-empty, it's supposed to be a filter */
- goto out_reg;
-
- if (!cmd_ops->set_filter)
- goto out_reg;
+ ret = event_trigger_parse_num(param, trigger_data);
+ if (ret)
+ goto out_free;
- ret = cmd_ops->set_filter(param, trigger_data, file);
+ ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data);
if (ret < 0)
goto out_free;
- out_reg:
/* Up the trigger_data count to make sure reg doesn't free it on failure */
- event_trigger_init(trigger_ops, trigger_data);
- ret = cmd_ops->reg(glob, trigger_data, file);
- /*
- * The above returns on success the # of functions enabled,
- * but if it didn't find any functions it returns zero.
- * Consider no functions a failure too.
- */
- if (!ret) {
- cmd_ops->unreg(glob, trigger_data, file);
- ret = -ENOENT;
- } else if (ret > 0)
- ret = 0;
+ event_trigger_init(trigger_data);
+
+ ret = event_trigger_register(cmd_ops, file, glob, trigger_data);
+ if (ret)
+ goto out_free;
/* Down the counter of trigger_data or free it if not used anymore */
- event_trigger_free(trigger_ops, trigger_data);
+ event_trigger_free(trigger_data);
out:
return ret;
out_free:
- if (cmd_ops->set_filter)
- cmd_ops->set_filter(NULL, trigger_data, NULL);
+ event_trigger_reset_filter(cmd_ops, trigger_data);
kfree(trigger_data);
goto out;
}
@@ -1401,16 +1343,14 @@ traceoff_count_trigger(struct event_trigger_data *data,
}
static int
-traceon_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+traceon_trigger_print(struct seq_file *m, struct event_trigger_data *data)
{
return event_trigger_print("traceon", m, (void *)data->count,
data->filter_str);
}
static int
-traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data)
{
return event_trigger_print("traceoff", m, (void *)data->count,
data->filter_str);
@@ -1521,8 +1461,7 @@ register_snapshot_trigger(char *glob,
}
static int
-snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data)
{
return event_trigger_print("snapshot", m, (void *)data->count,
data->filter_str);
@@ -1617,8 +1556,7 @@ stacktrace_count_trigger(struct event_trigger_data *data,
}
static int
-stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
{
return event_trigger_print("stacktrace", m, (void *)data->count,
data->filter_str);
@@ -1708,7 +1646,6 @@ event_enable_count_trigger(struct event_trigger_data *data,
}
int event_enable_trigger_print(struct seq_file *m,
- struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1733,8 +1670,7 @@ int event_enable_trigger_print(struct seq_file *m,
return 0;
}
-void event_enable_trigger_free(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+void event_enable_trigger_free(struct event_trigger_data *data)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1781,39 +1717,33 @@ static struct event_trigger_ops event_disable_count_trigger_ops = {
int event_enable_trigger_parse(struct event_command *cmd_ops,
struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+ char *glob, char *cmd, char *param_and_filter)
{
struct trace_event_file *event_enable_file;
struct enable_trigger_data *enable_data;
struct event_trigger_data *trigger_data;
- struct event_trigger_ops *trigger_ops;
struct trace_array *tr = file->tr;
+ char *param, *filter;
+ bool enable, remove;
const char *system;
const char *event;
bool hist = false;
- char *trigger;
- char *number;
- bool enable;
int ret;
- if (!param)
- return -EINVAL;
+ remove = event_trigger_check_remove(glob);
- /* separate the trigger from the filter (s:e:n [if filter]) */
- trigger = strsep(&param, " \t");
- if (!trigger)
+ if (event_trigger_empty_param(param_and_filter))
return -EINVAL;
- if (param) {
- param = skip_spaces(param);
- if (!*param)
- param = NULL;
- }
- system = strsep(&trigger, ":");
- if (!trigger)
+ ret = event_trigger_separate_filter(param_and_filter, &param, &filter, true);
+ if (ret)
+ return ret;
+
+ system = strsep(&param, ":");
+ if (!param)
return -EINVAL;
- event = strsep(&trigger, ":");
+ event = strsep(&param, ":");
ret = -EINVAL;
event_enable_file = find_event_file(tr, system, event);
@@ -1829,32 +1759,24 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
#else
enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
#endif
- trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
-
ret = -ENOMEM;
- trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
- if (!trigger_data)
- goto out;
enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL);
- if (!enable_data) {
- kfree(trigger_data);
+ if (!enable_data)
goto out;
- }
-
- trigger_data->count = -1;
- trigger_data->ops = trigger_ops;
- trigger_data->cmd_ops = cmd_ops;
- INIT_LIST_HEAD(&trigger_data->list);
- RCU_INIT_POINTER(trigger_data->filter, NULL);
enable_data->hist = hist;
enable_data->enable = enable;
enable_data->file = event_enable_file;
- trigger_data->private_data = enable_data;
- if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_data, file);
+ trigger_data = event_trigger_alloc(cmd_ops, cmd, param, enable_data);
+ if (!trigger_data) {
+ kfree(enable_data);
+ goto out;
+ }
+
+ if (remove) {
+ event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
kfree(trigger_data);
kfree(enable_data);
ret = 0;
@@ -1862,35 +1784,16 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
}
/* Up the trigger_data count to make sure nothing frees it on failure */
- event_trigger_init(trigger_ops, trigger_data);
-
- if (trigger) {
- number = strsep(&trigger, ":");
-
- ret = -EINVAL;
- if (!strlen(number))
- goto out_free;
-
- /*
- * We use the callback data field (which is a pointer)
- * as our counter.
- */
- ret = kstrtoul(number, 0, &trigger_data->count);
- if (ret)
- goto out_free;
- }
+ event_trigger_init(trigger_data);
- if (!param) /* if param is non-empty, it's supposed to be a filter */
- goto out_reg;
-
- if (!cmd_ops->set_filter)
- goto out_reg;
+ ret = event_trigger_parse_num(param, trigger_data);
+ if (ret)
+ goto out_free;
- ret = cmd_ops->set_filter(param, trigger_data, file);
+ ret = event_trigger_set_filter(cmd_ops, file, filter, trigger_data);
if (ret < 0)
goto out_free;
- out_reg:
/* Don't let event modules unload while probe registered */
ret = trace_event_try_get_ref(event_enable_file->event_call);
if (!ret) {
@@ -1901,32 +1804,23 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
ret = trace_event_enable_disable(event_enable_file, 1, 1);
if (ret < 0)
goto out_put;
- ret = cmd_ops->reg(glob, trigger_data, file);
- /*
- * The above returns on success the # of functions enabled,
- * but if it didn't find any functions it returns zero.
- * Consider no functions a failure too.
- */
- if (!ret) {
- ret = -ENOENT;
- goto out_disable;
- } else if (ret < 0)
+
+ ret = event_trigger_register(cmd_ops, file, glob, trigger_data);
+ if (ret)
goto out_disable;
- /* Just return zero, not the number of enabled functions */
- ret = 0;
- event_trigger_free(trigger_ops, trigger_data);
+
+ event_trigger_free(trigger_data);
out:
return ret;
-
out_disable:
trace_event_enable_disable(event_enable_file, 0, 1);
out_put:
trace_event_put_ref(event_enable_file->event_call);
out_free:
- if (cmd_ops->set_filter)
- cmd_ops->set_filter(NULL, trigger_data, NULL);
- event_trigger_free(trigger_ops, trigger_data);
+ event_trigger_reset_filter(cmd_ops, trigger_data);
+ event_trigger_free(trigger_data);
kfree(enable_data);
+
goto out;
}
@@ -1953,19 +1847,18 @@ int event_enable_register_trigger(char *glob,
}
if (data->ops->init) {
- ret = data->ops->init(data->ops, data);
+ ret = data->ops->init(data);
if (ret < 0)
goto out;
}
list_add_rcu(&data->list, &file->triggers);
- ret++;
update_cond_flag(file);
- if (trace_event_trigger_enable_disable(file, 1) < 0) {
+ ret = trace_event_trigger_enable_disable(file, 1);
+ if (ret < 0) {
list_del_rcu(&data->list);
update_cond_flag(file);
- ret--;
}
out:
return ret;
@@ -1976,19 +1869,18 @@ void event_enable_unregister_trigger(char *glob,
struct trace_event_file *file)
{
struct enable_trigger_data *test_enable_data = test->private_data;
+ struct event_trigger_data *data = NULL, *iter;
struct enable_trigger_data *enable_data;
- struct event_trigger_data *data;
- bool unregistered = false;
lockdep_assert_held(&event_mutex);
- list_for_each_entry(data, &file->triggers, list) {
- enable_data = data->private_data;
+ list_for_each_entry(iter, &file->triggers, list) {
+ enable_data = iter->private_data;
if (enable_data &&
- (data->cmd_ops->trigger_type ==
+ (iter->cmd_ops->trigger_type ==
test->cmd_ops->trigger_type) &&
(enable_data->file == test_enable_data->file)) {
- unregistered = true;
+ data = iter;
list_del_rcu(&data->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
@@ -1996,8 +1888,8 @@ void event_enable_unregister_trigger(char *glob,
}
}
- if (unregistered && data->ops->free)
- data->ops->free(data->ops, data);
+ if (data && data->ops->free)
+ data->ops->free(data);
}
static struct event_trigger_ops *
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 47cebef78532..93507330462c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1907,25 +1907,18 @@ core_initcall(init_kprobe_trace_early);
static __init int init_kprobe_trace(void)
{
int ret;
- struct dentry *entry;
ret = tracing_init_dentry();
if (ret)
return 0;
- entry = tracefs_create_file("kprobe_events", TRACE_MODE_WRITE,
- NULL, NULL, &kprobe_events_ops);
-
/* Event list interface */
- if (!entry)
- pr_warn("Could not create tracefs 'kprobe_events' entry\n");
+ trace_create_file("kprobe_events", TRACE_MODE_WRITE,
+ NULL, NULL, &kprobe_events_ops);
/* Profile interface */
- entry = tracefs_create_file("kprobe_profile", TRACE_MODE_READ,
- NULL, NULL, &kprobe_profile_ops);
-
- if (!entry)
- pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+ trace_create_file("kprobe_profile", TRACE_MODE_READ,
+ NULL, NULL, &kprobe_profile_ops);
setup_boot_kprobe_events();
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index afb92e2f0aea..313439920a8c 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1578,11 +1578,27 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
trace_timerlat_sample(&s);
- notify_new_max_latency(diff);
+ if (osnoise_data.stop_tracing) {
+ if (time_to_us(diff) >= osnoise_data.stop_tracing) {
+
+ /*
+ * At this point, if stop_tracing is set and <= print_stack,
+ * print_stack is set and would be printed in the thread handler.
+ *
+ * Thus, print the stack trace as it is helpful to define the
+ * root cause of an IRQ latency.
+ */
+ if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
+ timerlat_save_stack(0);
+ timerlat_dump_stack(time_to_us(diff));
+ }
- if (osnoise_data.stop_tracing)
- if (time_to_us(diff) >= osnoise_data.stop_tracing)
osnoise_stop_tracing();
+ notify_new_max_latency(diff);
+
+ return HRTIMER_NORESTART;
+ }
+ }
wake_up_process(tlat->kthread);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8aa493d25c73..67f47ea27921 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -692,7 +692,7 @@ static LIST_HEAD(ftrace_event_list);
static int trace_search_list(struct list_head **list)
{
- struct trace_event *e;
+ struct trace_event *e = NULL, *iter;
int next = __TRACE_LAST_TYPE;
if (list_empty(&ftrace_event_list)) {
@@ -704,9 +704,11 @@ static int trace_search_list(struct list_head **list)
* We used up all possible max events,
* lets see if somebody freed one.
*/
- list_for_each_entry(e, &ftrace_event_list, list) {
- if (e->type != next)
+ list_for_each_entry(iter, &ftrace_event_list, list) {
+ if (iter->type != next) {
+ e = iter;
break;
+ }
next++;
}
@@ -714,7 +716,10 @@ static int trace_search_list(struct list_head **list)
if (next > TRACE_EVENT_TYPE_MAX)
return 0;
- *list = &e->list;
+ if (e)
+ *list = &e->list;
+ else
+ *list = &ftrace_event_list;
return next;
}
@@ -778,9 +783,8 @@ int register_trace_event(struct trace_event *event)
list_add_tail(&event->list, list);
- } else if (event->type > __TRACE_LAST_TYPE) {
- printk(KERN_WARNING "Need to add type to trace.h\n");
- WARN_ON(1);
+ } else if (WARN(event->type > __TRACE_LAST_TYPE,
+ "Need to add type to trace.h")) {
goto out;
} else {
/* Is this event already used */
@@ -1571,13 +1575,8 @@ __init static int init_events(void)
for (i = 0; events[i]; i++) {
event = events[i];
-
ret = register_trace_event(event);
- if (!ret) {
- printk(KERN_WARNING "event %d failed to register\n",
- event->type);
- WARN_ON_ONCE(1);
- }
+ WARN_ONCE(!ret, "event %d failed to register", event->type);
}
return 0;
diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
index 4d4b78c8ca25..a520b11afb0d 100644
--- a/kernel/trace/trace_recursion_record.c
+++ b/kernel/trace/trace_recursion_record.c
@@ -224,12 +224,9 @@ static const struct file_operations recursed_functions_fops = {
__init static int create_recursed_functions(void)
{
- struct dentry *dentry;
- dentry = trace_create_file("recursed_functions", TRACE_MODE_WRITE,
- NULL, NULL, &recursed_functions_fops);
- if (!dentry)
- pr_warn("WARNING: Failed to create recursed_functions\n");
+ trace_create_file("recursed_functions", TRACE_MODE_WRITE,
+ NULL, NULL, &recursed_functions_fops);
return 0;
}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index abcadbe933bb..a2d301f58ced 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -895,6 +895,9 @@ trace_selftest_startup_function_graph(struct tracer *trace,
ret = -1;
goto out;
}
+
+ /* Enable tracing on all functions again */
+ ftrace_set_global_filter(NULL, 0, 1);
#endif
/* Don't test dynamic tracing, the function tracer already did */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f755bde42fd0..b69e207012c9 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -154,7 +154,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
goto end;
/* parameter types */
- if (tr->trace_flags & TRACE_ITER_VERBOSE)
+ if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
trace_seq_printf(s, "%s ", entry->types[i]);
/* parameter values */
@@ -296,9 +296,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct trace_event_file *trace_file;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
- struct ring_buffer_event *event;
- struct trace_buffer *buffer;
- unsigned int trace_ctx;
+ struct trace_event_buffer fbuffer;
unsigned long args[6];
int syscall_nr;
int size;
@@ -321,20 +319,16 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
- trace_ctx = tracing_gen_ctx();
-
- event = trace_event_buffer_lock_reserve(&buffer, trace_file,
- sys_data->enter_event->event.type, size, trace_ctx);
- if (!event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
+ if (!entry)
return;
- entry = ring_buffer_event_data(event);
+ entry = ring_buffer_event_data(fbuffer.event);
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, args);
memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
- event_trigger_unlock_commit(trace_file, buffer, event, entry,
- trace_ctx);
+ trace_event_buffer_commit(&fbuffer);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
@@ -343,9 +337,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
struct trace_event_file *trace_file;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
- struct ring_buffer_event *event;
- struct trace_buffer *buffer;
- unsigned int trace_ctx;
+ struct trace_event_buffer fbuffer;
int syscall_nr;
syscall_nr = trace_get_syscall_nr(current, regs);
@@ -364,20 +356,15 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
if (!sys_data)
return;
- trace_ctx = tracing_gen_ctx();
-
- event = trace_event_buffer_lock_reserve(&buffer, trace_file,
- sys_data->exit_event->event.type, sizeof(*entry),
- trace_ctx);
- if (!event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry));
+ if (!entry)
return;
- entry = ring_buffer_event_data(event);
+ entry = ring_buffer_event_data(fbuffer.event);
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
- event_trigger_unlock_commit(trace_file, buffer, event, entry,
- trace_ctx);
+ trace_event_buffer_commit(&fbuffer);
}
static int reg_event_syscall_enter(struct trace_event_file *file,
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 9628b5571846..9901708ce6b8 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -1045,7 +1045,8 @@ static void sort_secondary(struct tracing_map *map,
/**
* tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map
* @map: The tracing_map
- * @sort_key: The sort key to use for sorting
+ * @sort_keys: The sort key to use for sorting
+ * @n_sort_keys: hitcount, always have at least one
* @sort_entries: outval: pointer to allocated and sorted array of entries
*
* tracing_map_sort_entries() sorts the current set of entries in the
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 1d261fbe367b..4252f0645b9e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -23,15 +23,20 @@ void bacct_add_tsk(struct user_namespace *user_ns,
{
const struct cred *tcred;
u64 utime, stime, utimescaled, stimescaled;
- u64 delta;
+ u64 now_ns, delta;
time64_t btime;
BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
/* calculate task elapsed time in nsec */
- delta = ktime_get_ns() - tsk->start_time;
+ now_ns = ktime_get_ns();
+ /* store whole group time first */
+ delta = now_ns - tsk->group_leader->start_time;
/* Convert to micro seconds */
do_div(delta, NSEC_PER_USEC);
+ stats->ac_tgetime = delta;
+ delta = now_ns - tsk->start_time;
+ do_div(delta, NSEC_PER_USEC);
stats->ac_etime = delta;
/* Convert to seconds for btime (note y2106 limit) */
btime = ktime_get_real_seconds() - div_u64(delta, USEC_PER_SEC);
@@ -51,6 +56,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
stats->ac_nice = task_nice(tsk);
stats->ac_sched = tsk->policy;
stats->ac_pid = task_pid_nr_ns(tsk, pid_ns);
+ stats->ac_tgid = task_tgid_nr_ns(tsk, pid_ns);
rcu_read_lock();
tcred = __task_cred(tsk);
stats->ac_uid = from_kuid_munged(user_ns, tcred->uid);
diff --git a/kernel/umh.c b/kernel/umh.c
index 36c123360ab8..b989736e8707 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -132,7 +132,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
/* If SIGCLD is ignored do_wait won't populate the status. */
kernel_sigaction(SIGCHLD, SIG_DFL);
- pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
+ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
if (pid < 0)
sub_info->retval = pid;
else
@@ -171,8 +171,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
* want to pollute current->children, and we need a parent
* that always ignores SIGCHLD to ensure auto-reaping.
*/
- pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
- CLONE_PARENT | SIGCHLD);
+ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info,
+ CLONE_PARENT | SIGCHLD);
if (pid < 0) {
sub_info->retval = pid;
umh_complete(sub_info);
diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c
index 9dae1f648713..8303f4c7ca71 100644
--- a/kernel/usermode_driver.c
+++ b/kernel/usermode_driver.c
@@ -28,7 +28,7 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na
file = file_open_root_mnt(mnt, name, O_CREAT | O_WRONLY, 0700);
if (IS_ERR(file)) {
- mntput(mnt);
+ kern_unmount(mnt);
return ERR_CAST(file);
}
@@ -38,7 +38,7 @@ static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *na
if (err >= 0)
err = -ENOMEM;
filp_close(file, NULL);
- mntput(mnt);
+ kern_unmount(mnt);
return ERR_PTR(err);
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 40024e03d422..20a7a55e62b6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,7 +57,7 @@ int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+ IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
@@ -168,7 +168,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
- CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+ IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;