From b2c0710c464ede15e1fc52fb1e7ee9ba54cea186 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 9 Sep 2010 13:40:39 -0700 Subject: rcu: move TINY_RCU from softirq to kthread If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 71 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 13 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342a..86eef29cdfb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,8 +59,15 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_cbs_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); +static unsigned long have_rcu_cbs; +static void invoke_rcu_cbs(void); + /* Forward declarations for rcutiny_plugin.h. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static int rcu_cbs(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -123,7 +130,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -132,7 +139,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -152,10 +159,10 @@ void rcu_check_callbacks(int cpu, int user) } /* - * Helper function for rcu_process_callbacks() that operates on the - * specified rcu_ctrlkblk structure. + * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure + * whose grace period has elapsed. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; @@ -180,19 +187,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); + local_bh_disable(); list->func(list); + local_bh_enable(); list = next; } } /* - * Invoke any callbacks whose grace period has completed. + * This kthread invokes RCU callbacks whose grace periods have + * elapsed. It is awakened as needed, and takes the place of the + * RCU_SOFTIRQ that was used previously for this purpose. + * This is a kthread, but it is never stopped, at least not until + * the system goes down. + */ +static int rcu_cbs(void *arg) +{ + unsigned long work; + unsigned long flags; + + for (;;) { + wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + local_irq_save(flags); + work = have_rcu_cbs; + have_rcu_cbs = 0; + local_irq_restore(flags); + if (work) { + rcu_process_callbacks(&rcu_sched_ctrlblk); + rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); + } + } + + return 0; /* Not reached, but needed to shut gcc up. */ +} + +/* + * Wake up rcu_cbs() to process callbacks now eligible for invocation. */ -static void rcu_process_callbacks(struct softirq_action *unused) +static void invoke_rcu_cbs(void) { - __rcu_process_callbacks(&rcu_sched_ctrlblk); - __rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); + unsigned long flags; + + local_irq_save(flags); + have_rcu_cbs = 1; + wake_up(&rcu_cbs_wq); + local_irq_restore(flags); } /* @@ -282,7 +322,12 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __init rcu_init(void) +/* + * Spawn the kthread that invokes RCU callbacks. + */ +static int __init rcu_spawn_kthreads(void) { - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + return 0; } +early_initcall(rcu_spawn_kthreads); -- cgit v1.2.3 From 24278d148316d2180be6df40e06db013d8b232b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2010 17:25:23 -0700 Subject: rcu: priority boosting for TINY_PREEMPT_RCU Add priority boosting, but only for TINY_PREEMPT_RCU. This is enabled by the default-off RCU_BOOST kernel parameter. The priority to which to boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel parameter (defaulting to real-time priority 1) and the time to wait before boosting the readers blocking a given grace period is controlled by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +- include/linux/sched.h | 11 ++- init/Kconfig | 39 +++++++++ kernel/rcutiny.c | 66 ++++++--------- kernel/rcutiny_plugin.h | 208 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 280 insertions(+), 53 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6b..69f91aacdee 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -81,6 +81,12 @@ extern struct group_info init_groups; */ # define CAP_INIT_BSET CAP_FULL_SET +#ifdef CONFIG_RCU_BOOST +#define INIT_TASK_RCU_BOOST() \ + .rcu_boost_mutex = NULL, +#else +#define INIT_TASK_RCU_BOOST() +#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_TREE_PREEMPT() \ .rcu_blocked_node = NULL, @@ -92,7 +98,8 @@ extern struct group_info init_groups; .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - INIT_TASK_RCU_TREE_PREEMPT() + INIT_TASK_RCU_TREE_PREEMPT() \ + INIT_TASK_RCU_BOOST() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index e18473f0eb7..ed1a9bc52b2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,9 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + struct rt_mutex *rcu_boost_mutex; +#endif /* #ifdef CONFIG_RCU_BOOST */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1745,7 +1748,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { @@ -1753,7 +1757,10 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_read_unlock_special = 0; #ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + p->rcu_boost_mutex = NULL; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/init/Kconfig b/init/Kconfig index a619a1ac7f4..48efefcac12 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -450,6 +450,45 @@ config TREE_RCU_TRACE TREE_PREEMPT_RCU implementations, permitting Makefile to trivially select kernel/rcutree_trace.c. +config RCU_BOOST + bool "Enable RCU priority boosting" + depends on RT_MUTEXES && TINY_PREEMPT_RCU + default n + help + This option boosts the priority of preempted RCU readers that + block the current preemptible RCU grace period for too long. + This option also prevents heavy loads from blocking RCU + callback invocation for all flavors of RCU. + + Say Y here if you are working with real-time apps or heavy loads + Say N here if you are unsure. + +config RCU_BOOST_PRIO + int "Real-time priority to boost RCU readers to" + range 1 99 + depends on RCU_BOOST + default 1 + help + This option specifies the real-time priority to which preempted + RCU readers are to be boosted. If you are working with CPU-bound + real-time applications, you should specify a priority higher then + the highest-priority CPU-bound application. + + Specify the real-time priority, or take the default if unsure. + +config RCU_BOOST_DELAY + int "Milliseconds to delay boosting after RCU grace-period start" + range 0 3000 + depends on RCU_BOOST + default 500 + help + This option specifies the time to wait after the beginning of + a given grace period before priority-boosting preempted RCU + readers blocking that grace period. Note that any RCU reader + blocking an expedited RCU grace period is boosted immediately. + + Accept the default if unsure. + endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 86eef29cdfb..93d166582cb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -36,38 +36,16 @@ #include #include -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_cbs_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); -static unsigned long have_rcu_cbs; -static void invoke_rcu_cbs(void); +/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; +static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ +struct rcu_ctrlblk; static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_cbs(void *arg); +static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) * This is a kthread, but it is never stopped, at least not until * the system goes down. */ -static int rcu_cbs(void *arg) +static int rcu_kthread(void *arg) { unsigned long work; + unsigned long morework; unsigned long flags; for (;;) { - wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); + morework = rcu_boost(); local_irq_save(flags); - work = have_rcu_cbs; - have_rcu_cbs = 0; + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; local_irq_restore(flags); if (work) { rcu_process_callbacks(&rcu_sched_ctrlblk); rcu_process_callbacks(&rcu_bh_ctrlblk); rcu_preempt_process_callbacks(); } + schedule_timeout_interruptible(1); /* Leave CPU for others. */ } return 0; /* Not reached, but needed to shut gcc up. */ } /* - * Wake up rcu_cbs() to process callbacks now eligible for invocation. + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. */ -static void invoke_rcu_cbs(void) +static void invoke_rcu_kthread(void) { unsigned long flags; local_irq_save(flags); - have_rcu_cbs = 1; - wake_up(&rcu_cbs_wq); + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); local_irq_restore(flags); } @@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched); */ static int __init rcu_spawn_kthreads(void) { - rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); return 0; } early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 95f9239df51..24f43165f22 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -24,6 +24,29 @@ #include +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is not such task. */ + /* is no such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ +#ifdef CONFIG_RCU_BOOST + struct list_head *boost_tasks; + /* Pointer to first task that needs to be */ + /* priority-boosted, or NULL if no priority */ + /* boosting is needed. If there is no */ + /* current or expedited grace period, there */ + /* can be no such task. */ +#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ + s8 boosted_this_gp; /* Has boosting already happened? */ + unsigned long boost_time; /* When to start boosting (jiffies) */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void) return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; } +/* + * Advance a ->blkd_tasks-list pointer to the next entry, instead + * returning NULL if at the end of the list. + */ +static struct list_head *rcu_next_node_entry(struct task_struct *t) +{ + struct list_head *np; + + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + return np; +} + +#ifdef CONFIG_RCU_BOOST + +#include "rtmutex_common.h" + +/* + * Carry out RCU priority boosting on the task indicated by ->boost_tasks, + * and advance ->boost_tasks to the next task in the ->blkd_tasks list. + */ +static int rcu_boost(void) +{ + unsigned long flags; + struct rt_mutex mtx; + struct list_head *np; + struct task_struct *t; + + if (rcu_preempt_ctrlblk.boost_tasks == NULL) + return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); + rcu_preempt_ctrlblk.boosted_this_gp++; + t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, + rcu_node_entry); + np = rcu_next_node_entry(t); + rt_mutex_init_proxy_locked(&mtx, t); + t->rcu_boost_mutex = &mtx; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; + raw_local_irq_restore(flags); + rt_mutex_lock(&mtx); + rt_mutex_unlock(&mtx); + return rcu_preempt_ctrlblk.boost_tasks != NULL; +} + +/* + * Check to see if it is now time to start boosting RCU readers blocking + * the current grace period, and, if so, tell the rcu_kthread_task to + * start boosting them. If there is an expedited boost in progress, + * we wait for it to complete. + */ +static void rcu_initiate_boost(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.boosted_this_gp == 0 && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { + rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + invoke_rcu_kthread(); + } +} + +/* + * Initiate boosting for an expedited grace period. + */ +static void rcu_initiate_expedited_boost(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + rcu_preempt_ctrlblk.boosted_this_gp = -1; + invoke_rcu_kthread(); + } + raw_local_irq_restore(flags); +} + +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); + +/* + * Do priority-boost accounting for the start of a new grace period. + */ +static void rcu_preempt_boost_start_gp(void) +{ + rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; + if (rcu_preempt_ctrlblk.boosted_this_gp > 0) + rcu_preempt_ctrlblk.boosted_this_gp = 0; +} + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * If there is no RCU priority boosting, we don't boost. + */ +static int rcu_boost(void) +{ + return 0; +} + +/* + * If there is no RCU priority boosting, we don't initiate boosting. + */ +static void rcu_initiate_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, we don't initiate expedited boosting. + */ +static void rcu_initiate_expedited_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, nothing to do at grace-period start. + */ +static void rcu_preempt_boost_start_gp(void) +{ +} + +#endif /* else #ifdef CONFIG_RCU_BOOST */ + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - /* - * If there is no GP, or if blocked readers are still blocking GP, - * then there is nothing more to do. - */ + /* If there is no GP then there is nothing more to do. */ if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) return; + /* If there are blocked readers, go check up on boosting. */ + if (rcu_preempt_blocked_readers_cgp()) { + rcu_initiate_boost(); + return; + } /* Advance callbacks. */ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; @@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void) /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; + /* Set up for RCU priority boosting. */ + rcu_preempt_boost_start_gp(); + /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; + np = rcu_next_node_entry(t); list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; +#ifdef CONFIG_RCU_BOOST + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) + rcu_preempt_ctrlblk.boost_tasks = np; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } +#ifdef CONFIG_RCU_BOOST + /* Unboost self if was boosted. */ + if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; + rt_mutex_unlock(t->rcu_boost_mutex); + t->rcu_boost_mutex = NULL; + } +#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_cbs(); + invoke_rcu_kthread(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) + rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -574,6 +747,15 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +/* + * Because preemptible RCU does not exist, it is never necessary to + * boost preempted RCU readers. + */ +static int rcu_boost(void) +{ + return 0; +} + /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void) } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else /* #ifdef CONFIG_RCU_BOOST */ +#define RCU_BOOST_PRIO 1 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3 From 9e571a82f0cb205a65a0ea41657f19f22b7fabb8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Sep 2010 21:26:52 -0700 Subject: rcu: add tracing for TINY_RCU and TINY_PREEMPT_RCU Add tracing for the tiny RCU implementations, including statistics on boosting in the case of TINY_PREEMPT_RCU and RCU_BOOST. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- init/Kconfig | 1 - kernel/rcutiny.c | 4 + kernel/rcutiny_plugin.h | 232 +++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 226 insertions(+), 11 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/init/Kconfig b/init/Kconfig index 48efefcac12..929adf6cb6b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -384,7 +384,6 @@ config PREEMPT_RCU config RCU_TRACE bool "Enable tracing for RCU" - depends on TREE_RCU || TREE_PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 93d166582cb..03449372474 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -144,6 +144,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; + RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) @@ -169,7 +170,9 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) list->func(list); local_bh_enable(); list = next; + RCU_TRACE(cb_count++); } + RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); } /* @@ -252,6 +255,7 @@ static void __call_rcu(struct rcu_head *head, local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; + RCU_TRACE(rcp->qlen++); local_irq_restore(flags); } diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 24f43165f22..f4e0df082d3 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -23,12 +23,21 @@ */ #include +#include +#include + +#ifdef CONFIG_RCU_TRACE +#define RCU_TRACE(stmt) stmt +#else /* #ifdef CONFIG_RCU_TRACE */ +#define RCU_TRACE(stmt) +#endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ struct rcu_head **curtail; /* ->next pointer of last CB. */ + RCU_TRACE(long qlen); /* Number of pending CBs. */ }; /* Definition for rcupdate control block. */ @@ -90,8 +99,26 @@ struct rcu_preempt_ctrlblk { u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ +#ifdef CONFIG_RCU_BOOST s8 boosted_this_gp; /* Has boosting already happened? */ unsigned long boost_time; /* When to start boosting (jiffies) */ +#endif /* #ifdef CONFIG_RCU_BOOST */ +#ifdef CONFIG_RCU_TRACE + unsigned long n_grace_periods; +#ifdef CONFIG_RCU_BOOST + unsigned long n_tasks_boosted; + unsigned long n_exp_boosts; + unsigned long n_normal_boosts; + unsigned long n_normal_balk_blkd_tasks; + unsigned long n_normal_balk_gp_tasks; + unsigned long n_normal_balk_boost_tasks; + unsigned long n_normal_balk_boosted; + unsigned long n_normal_balk_notyet; + unsigned long n_normal_balk_nos; + unsigned long n_exp_balk_blkd_tasks; + unsigned long n_exp_balk_nos; +#endif /* #ifdef CONFIG_RCU_BOOST */ +#endif /* #ifdef CONFIG_RCU_TRACE */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -170,6 +197,65 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t) return np; } +#ifdef CONFIG_RCU_TRACE + +#ifdef CONFIG_RCU_BOOST +static void rcu_initiate_boost_trace(void); +static void rcu_initiate_exp_boost_trace(void); +#endif /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Dump additional statistice for TINY_PREEMPT_RCU. + */ +static void show_tiny_preempt_stats(struct seq_file *m) +{ + seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n", + rcu_preempt_ctrlblk.rcb.qlen, + rcu_preempt_ctrlblk.n_grace_periods, + rcu_preempt_ctrlblk.gpnum, + rcu_preempt_ctrlblk.gpcpu, + rcu_preempt_ctrlblk.completed, + "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)], + "N."[!rcu_preempt_ctrlblk.gp_tasks], + "E."[!rcu_preempt_ctrlblk.exp_tasks]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, " ttb=%c btg=", + "B."[!rcu_preempt_ctrlblk.boost_tasks]); + switch (rcu_preempt_ctrlblk.boosted_this_gp) { + case -1: + seq_puts(m, "exp"); + break; + case 0: + seq_puts(m, "no"); + break; + case 1: + seq_puts(m, "done"); + break; + default: + seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); + } + seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", + rcu_preempt_ctrlblk.n_tasks_boosted, + rcu_preempt_ctrlblk.n_exp_boosts, + rcu_preempt_ctrlblk.n_normal_boosts, + (int)(jiffies & 0xffff), + (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); + seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", + "normal balk", + rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, + rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, + rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, + rcu_preempt_ctrlblk.n_normal_balk_boosted, + rcu_preempt_ctrlblk.n_normal_balk_notyet, + rcu_preempt_ctrlblk.n_normal_balk_nos); + seq_printf(m, " exp balk: bt=%lu nos=%lu\n", + rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, + rcu_preempt_ctrlblk.n_exp_balk_nos); +#endif /* #ifdef CONFIG_RCU_BOOST */ +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + #ifdef CONFIG_RCU_BOOST #include "rtmutex_common.h" @@ -197,6 +283,7 @@ static int rcu_boost(void) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; raw_local_irq_restore(flags); rt_mutex_lock(&mtx); + RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); rt_mutex_unlock(&mtx); return rcu_preempt_ctrlblk.boost_tasks != NULL; } @@ -206,16 +293,27 @@ static int rcu_boost(void) * the current grace period, and, if so, tell the rcu_kthread_task to * start boosting them. If there is an expedited boost in progress, * we wait for it to complete. + * + * If there are no blocked readers blocking the current grace period, + * return 0 to let the caller know, otherwise return 1. Note that this + * return value is independent of whether or not boosting was done. */ -static void rcu_initiate_boost(void) +static int rcu_initiate_boost(void) { + if (!rcu_preempt_blocked_readers_cgp()) { + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); + return 0; + } if (rcu_preempt_ctrlblk.gp_tasks != NULL && rcu_preempt_ctrlblk.boost_tasks == NULL && rcu_preempt_ctrlblk.boosted_this_gp == 0 && ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_kthread(); - } + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); + } else + RCU_TRACE(rcu_initiate_boost_trace()); + return 1; } /* @@ -231,7 +329,9 @@ static void rcu_initiate_expedited_boost(void) rcu_preempt_ctrlblk.blkd_tasks.next; rcu_preempt_ctrlblk.boosted_this_gp = -1; invoke_rcu_kthread(); - } + RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); + } else + RCU_TRACE(rcu_initiate_exp_boost_trace()); raw_local_irq_restore(flags); } @@ -258,10 +358,13 @@ static int rcu_boost(void) } /* - * If there is no RCU priority boosting, we don't initiate boosting. + * If there is no RCU priority boosting, we don't initiate boosting, + * but we do indicate whether there are blocked readers blocking the + * current grace period. */ -static void rcu_initiate_boost(void) +static int rcu_initiate_boost(void) { + return rcu_preempt_blocked_readers_cgp(); } /* @@ -308,13 +411,14 @@ static void rcu_preempt_cpu_qs(void) current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; /* If there is no GP then there is nothing more to do. */ - if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) + if (!rcu_preempt_gp_in_progress()) return; - /* If there are blocked readers, go check up on boosting. */ - if (rcu_preempt_blocked_readers_cgp()) { - rcu_initiate_boost(); + /* + * Check up on boosting. If there are no readers blocking the + * current grace period, leave. + */ + if (rcu_initiate_boost()) return; - } /* Advance callbacks. */ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; @@ -339,6 +443,7 @@ static void rcu_preempt_start_gp(void) /* Official start of GP. */ rcu_preempt_ctrlblk.gpnum++; + RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); /* Any blocked RCU readers block new GP. */ if (rcu_preempt_blocked_readers_any()) @@ -591,6 +696,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) local_irq_save(flags); *rcu_preempt_ctrlblk.nexttail = head; rcu_preempt_ctrlblk.nexttail = &head->next; + RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++); rcu_preempt_start_gp(); /* checks to see if GP needed. */ local_irq_restore(flags); } @@ -747,6 +853,18 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +#ifdef CONFIG_RCU_TRACE + +/* + * Because preemptible RCU does not exist, it is not necessary to + * dump out its statistics. + */ +static void show_tiny_preempt_stats(struct seq_file *m) +{ +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + /* * Because preemptible RCU does not exist, it is never necessary to * boost preempted RCU readers. @@ -802,3 +920,97 @@ void __init rcu_scheduler_starting(void) #else /* #ifdef CONFIG_RCU_BOOST */ #define RCU_BOOST_PRIO 1 #endif /* #else #ifdef CONFIG_RCU_BOOST */ + +#ifdef CONFIG_RCU_TRACE + +#ifdef CONFIG_RCU_BOOST + +static void rcu_initiate_boost_trace(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks == NULL) + rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; + else if (rcu_preempt_ctrlblk.boost_tasks != NULL) + rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; + else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) + rcu_preempt_ctrlblk.n_normal_balk_boosted++; + else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) + rcu_preempt_ctrlblk.n_normal_balk_notyet++; + else + rcu_preempt_ctrlblk.n_normal_balk_nos++; +} + +static void rcu_initiate_exp_boost_trace(void) +{ + if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) + rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; + else + rcu_preempt_ctrlblk.n_exp_balk_nos++; +} + +#endif /* #ifdef CONFIG_RCU_BOOST */ + +static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) +{ + unsigned long flags; + + raw_local_irq_save(flags); + rcp->qlen -= n; + raw_local_irq_restore(flags); +} + +/* + * Dump statistics for TINY_RCU, such as they are. + */ +static int show_tiny_stats(struct seq_file *m, void *unused) +{ + show_tiny_preempt_stats(m); + seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); + seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); + return 0; +} + +static int show_tiny_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_tiny_stats, NULL); +} + +static const struct file_operations show_tiny_stats_fops = { + .owner = THIS_MODULE, + .open = show_tiny_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; + +static int __init rcutiny_trace_init(void) +{ + struct dentry *retval; + + rcudir = debugfs_create_dir("rcu", NULL); + if (!rcudir) + goto free_out; + retval = debugfs_create_file("rcudata", 0444, rcudir, + NULL, &show_tiny_stats_fops); + if (!retval) + goto free_out; + return 0; +free_out: + debugfs_remove_recursive(rcudir); + return 1; +} + +static void __exit rcutiny_trace_cleanup(void) +{ + debugfs_remove_recursive(rcudir); +} + +module_init(rcutiny_trace_init); +module_exit(rcutiny_trace_cleanup); + +MODULE_AUTHOR("Paul E. McKenney"); +MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); +MODULE_LICENSE("GPL"); + +#endif /* #ifdef CONFIG_RCU_TRACE */ -- cgit v1.2.3