From 43918f2bf4806675943416d539d9d5e4d585ebff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:00 -0700 Subject: signals: remove 'handler' parameter to tracehook functions Container-init must behave like global-init to processes within the container and hence it must be immune to unhandled fatal signals from within the container (i.e SIG_DFL signals that terminate the process). But the same container-init must behave like a normal process to processes in ancestor namespaces and so if it receives the same fatal signal from a process in ancestor namespace, the signal must be processed. Implementing these semantics requires that send_signal() determine pid namespace of the sender but since signals can originate from workqueues/ interrupt-handlers, determining pid namespace of sender may not always be possible or safe. This patchset implements the design/simplified semantics suggested by Oleg Nesterov. The simplified semantics for container-init are: - container-init must never be terminated by a signal from a descendant process. - container-init must never be immune to SIGKILL from an ancestor namespace (so a process in parent namespace must always be able to terminate a descendant container). - container-init may be immune to unhandled fatal signals (like SIGUSR1) even if they are from ancestor namespace. SIGKILL/SIGSTOP are the only reliable signals to a container-init from ancestor namespace. This patch: Based on an earlier patch submitted by Oleg Nesterov and comments from Roland McGrath (http://lkml.org/lkml/2008/11/19/258). The handler parameter is currently unused in the tracehook functions. Besides, the tracehook functions are called with siglock held, so the functions can check the handler if they later need to. Removing the parameter simiplifies changes to sig_ignored() in a follow-on patch. Signed-off-by: Sukadev Bhattiprolu Acked-by: Roland McGrath Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Daniel Lezcano Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 1c8814481a11..92a1ab004498 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -74,7 +74,7 @@ static int sig_ignored(struct task_struct *t, int sig) /* * Tracers may want to know about even ignored signals. */ - return !tracehook_consider_ignored_signal(t, sig, handler); + return !tracehook_consider_ignored_signal(t, sig); } /* @@ -318,7 +318,7 @@ int unhandled_signal(struct task_struct *tsk, int sig) return 1; if (handler != SIG_IGN && handler != SIG_DFL) return 0; - return !tracehook_consider_fatal_signal(tsk, sig, handler); + return !tracehook_consider_fatal_signal(tsk, sig); } @@ -777,7 +777,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || - !tracehook_consider_fatal_signal(t, sig, SIG_DFL))) { + !tracehook_consider_fatal_signal(t, sig))) { /* * This signal will be fatal to the whole group. */ -- cgit v1.2.3 From f008faff0e2777c8b3fe853891b774ca465938d8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:02 -0700 Subject: signals: protect init from unwanted signals more (This is a modified version of the patch submitted by Oleg Nesterov http://lkml.org/lkml/2008/11/18/249 and tries to address comments that came up in that discussion) init ignores the SIG_DFL signals but we queue them anyway, including SIGKILL. This is mostly OK, the signal will be dropped silently when dequeued, but the pending SIGKILL has 2 bad implications: - it implies fatal_signal_pending(), so we confuse things like wait_for_completion_killable/lock_page_killable. - for the sub-namespace inits, the pending SIGKILL can mask (legacy_queue) the subsequent SIGKILL from the parent namespace which must kill cinit reliably. (preparation, cinits don't have SIGNAL_UNKILLABLE yet) The patch can't help when init is ptraced, but ptracing of init is not "safe" anyway. Signed-off-by: Sukadev Bhattiprolu Acked-by: Roland McGrath Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 92a1ab004498..8bf7a40e5c71 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -55,10 +55,21 @@ static int sig_handler_ignored(void __user *handler, int sig) (handler == SIG_DFL && sig_kernel_ignore(sig)); } -static int sig_ignored(struct task_struct *t, int sig) +static int sig_task_ignored(struct task_struct *t, int sig) { void __user *handler; + handler = sig_handler(t, sig); + + if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && + handler == SIG_DFL) + return 1; + + return sig_handler_ignored(handler, sig); +} + +static int sig_ignored(struct task_struct *t, int sig) +{ /* * Blocked signals are never ignored, since the * signal handler may change by the time it is @@ -67,8 +78,7 @@ static int sig_ignored(struct task_struct *t, int sig) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - handler = sig_handler(t, sig); - if (!sig_handler_ignored(handler, sig)) + if (!sig_task_ignored(t, sig)) return 0; /* -- cgit v1.2.3 From 7978b567d31555fc828b8f945c605ad29e117b22 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:04 -0700 Subject: signals: add from_ancestor_ns parameter to send_signal() send_signal() (or its helper) needs to determine the pid namespace of the sender. But a signal sent via kill_pid_info_as_uid() comes from within the kernel and send_signal() does not need to determine the pid namespace of the sender. So define a helper for send_signal() which takes an additional parameter, 'from_ancestor_ns' and have kill_pid_info_as_uid() use that helper directly. The 'from_ancestor_ns' parameter will be used in a follow-on patch. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 8bf7a40e5c71..7b6de962a1af 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -823,8 +823,8 @@ static inline int legacy_queue(struct sigpending *signals, int sig) return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } -static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - int group) +static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, + int group, int from_ancestor_ns) { struct sigpending *pending; struct sigqueue *q; @@ -899,6 +899,12 @@ out_set: return 0; } +static int send_signal(int sig, struct siginfo *info, struct task_struct *t, + int group) +{ + return __send_signal(sig, info, t, group, 0); +} + int print_fatal_signals; static void print_fatal_signal(struct pt_regs *regs, int signr) @@ -1143,7 +1149,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, if (sig && p->sighand) { unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); - ret = __group_send_sig_info(sig, info, p); + ret = __send_signal(sig, info, p, 1, 0); spin_unlock_irqrestore(&p->sighand->siglock, flags); } out_unlock: -- cgit v1.2.3 From 921cf9f63089c7442d44083477620132f4cea066 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:05 -0700 Subject: signals: protect cinit from unblocked SIG_DFL signals Drop early any SIG_DFL or SIG_IGN signals to container-init from within the same container. But queue SIGSTOP and SIGKILL to the container-init if they are from an ancestor container. Blocked, fatal signals (i.e when SIG_DFL is to terminate) from within the container can still terminate the container-init. That will be addressed in the next patch. Note: To be bisect-safe, SIGNAL_UNKILLABLE will be set for container-inits in a follow-on patch. Until then, this patch is just a preparatory step. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 7b6de962a1af..fb19aae2363b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -55,20 +55,21 @@ static int sig_handler_ignored(void __user *handler, int sig) (handler == SIG_DFL && sig_kernel_ignore(sig)); } -static int sig_task_ignored(struct task_struct *t, int sig) +static int sig_task_ignored(struct task_struct *t, int sig, + int from_ancestor_ns) { void __user *handler; handler = sig_handler(t, sig); if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && - handler == SIG_DFL) + handler == SIG_DFL && !from_ancestor_ns) return 1; return sig_handler_ignored(handler, sig); } -static int sig_ignored(struct task_struct *t, int sig) +static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) { /* * Blocked signals are never ignored, since the @@ -78,7 +79,7 @@ static int sig_ignored(struct task_struct *t, int sig) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - if (!sig_task_ignored(t, sig)) + if (!sig_task_ignored(t, sig, from_ancestor_ns)) return 0; /* @@ -634,7 +635,7 @@ static int check_kill_permission(int sig, struct siginfo *info, * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ -static int prepare_signal(int sig, struct task_struct *p) +static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) { struct signal_struct *signal = p->signal; struct task_struct *t; @@ -718,7 +719,7 @@ static int prepare_signal(int sig, struct task_struct *p) } } - return !sig_ignored(p, sig); + return !sig_ignored(p, sig, from_ancestor_ns); } /* @@ -832,7 +833,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, trace_sched_signal_send(sig, t); assert_spin_locked(&t->sighand->siglock); - if (!prepare_signal(sig, t)) + + if (!prepare_signal(sig, t, from_ancestor_ns)) return 0; pending = group ? &t->signal->shared_pending : &t->pending; @@ -902,7 +904,15 @@ out_set: static int send_signal(int sig, struct siginfo *info, struct task_struct *t, int group) { - return __send_signal(sig, info, t, group, 0); + int from_ancestor_ns = 0; + +#ifdef CONFIG_PID_NS + if (!is_si_special(info) && SI_FROMUSER(info) && + task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0) + from_ancestor_ns = 1; +#endif + + return __send_signal(sig, info, t, group, from_ancestor_ns); } int print_fatal_signals; @@ -1336,7 +1346,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) goto ret; ret = 1; /* the signal is ignored */ - if (!prepare_signal(sig, t)) + if (!prepare_signal(sig, t, 0)) goto out; ret = 0; -- cgit v1.2.3 From b3bfa0cba867f23365b81658b47efd906830879b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:08 -0700 Subject: signals: protect cinit from blocked fatal signals Normally SIG_DFL signals to global and container-init are dropped early. But if a signal is blocked when it is posted, we cannot drop the signal since the receiver may install a handler before unblocking the signal. Once this signal is queued however, the receiver container-init has no way of knowing if the signal was sent from an ancestor or descendant namespace. This patch ensures that contianer-init drops all SIG_DFL signals in get_signal_to_deliver() except SIGKILL/SIGSTOP. If SIGSTOP/SIGKILL originate from a descendant of container-init they are never queued (i.e dropped in sig_ignored() in an earler patch). If SIGSTOP/SIGKILL originate from parent namespace, the signal is queued and container-init processes the signal. IOW, if get_signal_to_deliver() sees a sig_kernel_only() signal for global or container-init, the signal must have been generated internally or must have come from an ancestor ns and we process the signal. Further, the signal_group_exit() check was needed to cover the case of a multi-threaded init sending SIGKILL to other threads when doing an exit() or exec(). But since the new sig_kernel_only() check covers the SIGKILL, the signal_group_exit() check is no longer needed and can be removed. Finally, now that we have all pieces in place, set SIGNAL_UNKILLABLE for container-inits. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 ++ kernel/signal.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/signal.c') diff --git a/kernel/fork.c b/kernel/fork.c index d7eb727eb535..adbea16ec649 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -841,6 +841,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; + if (clone_flags & CLONE_NEWPID) + sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; diff --git a/kernel/signal.c b/kernel/signal.c index fb19aae2363b..ba3da25f0eea 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1870,9 +1870,16 @@ relock: /* * Global init gets no signals it doesn't want. + * Container-init gets no signals it doesn't want from same + * container. + * + * Note that if global/container-init sees a sig_kernel_only() + * signal here, the signal must have been generated internally + * or must have come from an ancestor namespace. In either + * case, the signal cannot be dropped. */ if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && - !signal_group_exit(signal)) + !sig_kernel_only(signr)) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3 From 6588c1e3ff01418acafd938db0740e3477dc8cb7 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:09 -0700 Subject: signals: SI_USER: Masquerade si_pid when crossing pid ns boundary When sending a signal to a descendant namespace, set ->si_pid to 0 since the sender does not have a pid in the receiver's namespace. Note: - If rt_sigqueueinfo() sets si_code to SI_USER when sending a signal across a pid namespace boundary, the value in ->si_pid will be cleared to 0. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index ba3da25f0eea..d8034737db4c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -883,6 +883,8 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; default: copy_siginfo(&q->info, info); + if (from_ancestor_ns) + q->info.si_pid = 0; break; } } else if (!is_si_special(info)) { -- cgit v1.2.3