summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/prctl.h3
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/exit.c33
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/sys.c8
5 files changed, 54 insertions, 5 deletions
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a0413ac3abe8..e0cfec2490aa 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -121,4 +121,7 @@
#define PR_SET_PTRACER 0x59616d61
# define PR_SET_PTRACER_ANY ((unsigned long)-1)
+#define PR_SET_CHILD_SUBREAPER 36
+#define PR_GET_CHILD_SUBREAPER 37
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0c147a4260a5..0c3854b0d4b1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -553,6 +553,18 @@ struct signal_struct {
int group_stop_count;
unsigned int flags; /* see SIGNAL_* flags below */
+ /*
+ * PR_SET_CHILD_SUBREAPER marks a process, like a service
+ * manager, to re-parent orphan (double-forking) child processes
+ * to this process instead of 'init'. The service manager is
+ * able to receive SIGCHLD signals and is able to investigate
+ * the process until it calls wait(). All children of this
+ * process will inherit a flag if they should look for a
+ * child_subreaper process at exit.
+ */
+ unsigned int is_child_subreaper:1;
+ unsigned int has_child_subreaper:1;
+
/* POSIX.1b Interval Timers */
struct list_head posix_timers;
diff --git a/kernel/exit.c b/kernel/exit.c
index 16b07bfac224..456329fd4ea3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -687,11 +687,11 @@ static void exit_mm(struct task_struct * tsk)
}
/*
- * When we die, we re-parent all our children.
- * Try to give them to another thread in our thread
- * group, and if no such member exists, give it to
- * the child reaper process (ie "init") in our pid
- * space.
+ * When we die, we re-parent all our children, and try to:
+ * 1. give them to another thread in our thread group, if such a member exists
+ * 2. give it to the first ancestor process which prctl'd itself as a
+ * child_subreaper for its children (like a service manager)
+ * 3. give it to the init process (PID 1) in our pid namespace
*/
static struct task_struct *find_new_reaper(struct task_struct *father)
__releases(&tasklist_lock)
@@ -722,6 +722,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
* forget_original_parent() must move them somewhere.
*/
pid_ns->child_reaper = init_pid_ns.child_reaper;
+ } else if (father->signal->has_child_subreaper) {
+ struct task_struct *reaper;
+
+ /*
+ * Find the first ancestor marked as child_subreaper.
+ * Note that the code below checks same_thread_group(reaper,
+ * pid_ns->child_reaper). This is what we need to DTRT in a
+ * PID namespace. However we still need the check above, see
+ * http://marc.info/?l=linux-kernel&m=131385460420380
+ */
+ for (reaper = father->real_parent;
+ reaper != &init_task;
+ reaper = reaper->real_parent) {
+ if (same_thread_group(reaper, pid_ns->child_reaper))
+ break;
+ if (!reaper->signal->is_child_subreaper)
+ continue;
+ thread = reaper;
+ do {
+ if (!(thread->flags & PF_EXITING))
+ return reaper;
+ } while_each_thread(reaper, thread);
+ }
}
return pid_ns->child_reaper;
diff --git a/kernel/fork.c b/kernel/fork.c
index 37674ec55cde..b9372a0bff18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1051,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
+ sig->has_child_subreaper = current->signal->has_child_subreaper ||
+ current->signal->is_child_subreaper;
+
mutex_init(&sig->cred_guard_mutex);
return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 888d227fd195..9eb7fcab8df6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1962,6 +1962,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_MM:
error = prctl_set_mm(arg2, arg3, arg4, arg5);
break;
+ case PR_SET_CHILD_SUBREAPER:
+ me->signal->is_child_subreaper = !!arg2;
+ error = 0;
+ break;
+ case PR_GET_CHILD_SUBREAPER:
+ error = put_user(me->signal->is_child_subreaper,
+ (int __user *) arg2);
+ break;
default:
error = -EINVAL;
break;