diff options
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/array.c | 38 | ||||
| -rw-r--r-- | fs/proc/base.c | 351 | ||||
| -rw-r--r-- | fs/proc/generic.c | 41 | ||||
| -rw-r--r-- | fs/proc/inode.c | 8 | ||||
| -rw-r--r-- | fs/proc/internal.h | 2 | ||||
| -rw-r--r-- | fs/proc/mmu.c | 21 | ||||
| -rw-r--r-- | fs/proc/proc_misc.c | 39 | ||||
| -rw-r--r-- | fs/proc/proc_net.c | 9 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 6 | ||||
| -rw-r--r-- | fs/proc/root.c | 85 |
10 files changed, 427 insertions, 173 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index 27b59f5f3bd..eba339ecba2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -77,6 +77,7 @@ #include <linux/cpuset.h> #include <linux/rcupdate.h> #include <linux/delayacct.h> +#include <linux/pid_namespace.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk) TASK_UNINTERRUPTIBLE | TASK_STOPPED | TASK_TRACED)) | - (tsk->exit_state & (EXIT_ZOMBIE | - EXIT_DEAD)); + tsk->exit_state; const char **p = &task_state_array[0]; while (state) { @@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer) struct group_info *group_info; int g; struct fdtable *fdt = NULL; + struct pid_namespace *ns; + pid_t ppid, tpid; + ns = current->nsproxy->pid_ns; rcu_read_lock(); + ppid = pid_alive(p) ? + task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; + tpid = pid_alive(p) && p->ptrace ? + task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0; buffer += sprintf(buffer, "State:\t%s\n" "Tgid:\t%d\n" @@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer) "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), - p->tgid, p->pid, - pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, - pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, + task_tgid_nr_ns(p, ns), + task_pid_nr_ns(p, ns), + ppid, tpid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -351,7 +358,8 @@ static cputime_t task_utime(struct task_struct *p) } utime = (clock_t)temp; - return clock_t_to_cputime(utime); + p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); + return p->prev_utime; } static cputime_t task_stime(struct task_struct *p) @@ -366,7 +374,8 @@ static cputime_t task_stime(struct task_struct *p) stime = nsec_to_clock_t(p->se.sum_exec_runtime) - cputime_to_clock_t(task_utime(p)); - return clock_t_to_cputime(stime); + p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + return p->prev_stime; } #endif @@ -394,6 +403,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; + struct pid_namespace *ns; + + ns = current->nsproxy->pid_ns; state = *get_task_state(task); vsize = eip = esp = 0; @@ -416,7 +428,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) struct signal_struct *sig = task->signal; if (sig->tty) { - tty_pgrp = pid_nr(sig->tty->pgrp); + tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); tty_nr = new_encode_dev(tty_devnum(sig->tty)); } @@ -446,12 +458,12 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) maj_flt += sig->maj_flt; utime = cputime_add(utime, sig->utime); stime = cputime_add(stime, sig->stime); - gtime += cputime_add(gtime, sig->gtime); + gtime = cputime_add(gtime, sig->gtime); } - sid = signal_session(sig); - pgid = process_group(task); - ppid = rcu_dereference(task->real_parent)->tgid; + sid = task_session_nr_ns(task, ns); + pgid = task_pgrp_nr_ns(task, ns); + ppid = task_ppid_nr_ns(task, ns); unlock_task_sighand(task, &flags); } @@ -483,7 +495,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", - task->pid, + task_pid_nr_ns(task, ns), tcomm, state, ppid, diff --git a/fs/proc/base.c b/fs/proc/base.c index e5d0953d4db..a17c2685907 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -63,16 +63,19 @@ #include <linux/mm.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> +#include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> #include <linux/elf.h> +#include <linux/pid_namespace.h> #include "internal.h" /* NOTE: @@ -199,27 +202,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ security_ptrace(current,task) == 0)) -static int proc_pid_environ(struct task_struct *task, char * buffer) -{ - int res = 0; - struct mm_struct *mm = get_task_mm(task); - if (mm) { - unsigned int len; - - res = -ESRCH; - if (!ptrace_may_attach(task)) - goto out; - - len = mm->env_end - mm->env_start; - if (len > PAGE_SIZE) - len = PAGE_SIZE; - res = access_process_vm(task, mm->env_start, buffer, len, 0); -out: - mmput(mm); - } - return res; -} - static int proc_pid_cmdline(struct task_struct *task, char * buffer) { int res = 0; @@ -322,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer) return sprintf(buffer, "%lu\n", points); } +struct limit_names { + char *name; + char *unit; +}; + +static const struct limit_names lnames[RLIM_NLIMITS] = { + [RLIMIT_CPU] = {"Max cpu time", "ms"}, + [RLIMIT_FSIZE] = {"Max file size", "bytes"}, + [RLIMIT_DATA] = {"Max data size", "bytes"}, + [RLIMIT_STACK] = {"Max stack size", "bytes"}, + [RLIMIT_CORE] = {"Max core file size", "bytes"}, + [RLIMIT_RSS] = {"Max resident set", "bytes"}, + [RLIMIT_NPROC] = {"Max processes", "processes"}, + [RLIMIT_NOFILE] = {"Max open files", "files"}, + [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, + [RLIMIT_AS] = {"Max address space", "bytes"}, + [RLIMIT_LOCKS] = {"Max file locks", "locks"}, + [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, + [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, + [RLIMIT_NICE] = {"Max nice priority", NULL}, + [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, +}; + +/* Display limits for a process */ +static int proc_pid_limits(struct task_struct *task, char *buffer) +{ + unsigned int i; + int count = 0; + unsigned long flags; + char *bufptr = buffer; + + struct rlimit rlim[RLIM_NLIMITS]; + + rcu_read_lock(); + if (!lock_task_sighand(task,&flags)) { + rcu_read_unlock(); + return 0; + } + memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); + unlock_task_sighand(task, &flags); + rcu_read_unlock(); + + /* + * print the file header + */ + count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", + "Limit", "Soft Limit", "Hard Limit", "Units"); + + for (i = 0; i < RLIM_NLIMITS; i++) { + if (rlim[i].rlim_cur == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-25s %-20s ", + lnames[i].name, "unlimited"); + else + count += sprintf(&bufptr[count], "%-25s %-20lu ", + lnames[i].name, rlim[i].rlim_cur); + + if (rlim[i].rlim_max == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-20s ", "unlimited"); + else + count += sprintf(&bufptr[count], "%-20lu ", + rlim[i].rlim_max); + + if (lnames[i].unit) + count += sprintf(&bufptr[count], "%-10s\n", + lnames[i].unit); + else + count += sprintf(&bufptr[count], "\n"); + } + + return count; +} + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -370,18 +424,21 @@ struct proc_mounts { static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = get_proc_task(inode); + struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct proc_mounts *p; int ret = -EINVAL; if (task) { - task_lock(task); - if (task->nsproxy) { - ns = task->nsproxy->mnt_ns; + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; if (ns) get_mnt_ns(ns); } - task_unlock(task); + rcu_read_unlock(); + put_task_struct(task); } @@ -444,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; + struct nsproxy *nsp; struct mnt_namespace *mnt_ns = NULL; struct task_struct *task = get_proc_task(inode); if (task) { - task_lock(task); - if (task->nsproxy) - mnt_ns = task->nsproxy->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - task_unlock(task); + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + mnt_ns = nsp->mnt_ns; + if (mnt_ns) + get_mnt_ns(mnt_ns); + } + rcu_read_unlock(); + put_task_struct(task); } @@ -492,7 +553,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, count = PROC_BLOCK_SIZE; length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) + if (!(page = __get_free_page(GFP_TEMPORARY))) goto out; length = PROC_I(inode)->op.proc_read(task, (char*)page); @@ -532,7 +593,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, goto out; ret = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -602,7 +663,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, goto out; copied = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -658,6 +719,76 @@ static const struct file_operations proc_mem_operations = { .open = mem_open, }; +static ssize_t environ_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + char *page; + unsigned long src = *ppos; + int ret = -ESRCH; + struct mm_struct *mm; + + if (!task) + goto out_no_task; + + if (!ptrace_may_attach(task)) + goto out; + + ret = -ENOMEM; + page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) + goto out; + + ret = 0; + + mm = get_task_mm(task); + if (!mm) + goto out_free; + + while (count > 0) { + int this_len, retval, max_len; + + this_len = mm->env_end - (mm->env_start + src); + + if (this_len <= 0) + break; + + max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; + this_len = (this_len > max_len) ? max_len : this_len; + + retval = access_process_vm(task, (mm->env_start + src), + page, this_len, 0); + + if (retval <= 0) { + ret = retval; + break; + } + + if (copy_to_user(buf, page, retval)) { + ret = -EFAULT; + break; + } + + ret += retval; + src += retval; + buf += retval; + count -= retval; + } + *ppos = src; + + mmput(mm); +out_free: + free_page((unsigned long) page); +out: + put_task_struct(task); +out_no_task: + return ret; +} + +static const struct file_operations proc_environ_operations = { + .read = environ_read, +}; + static ssize_t oom_adjust_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -788,7 +919,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) return -ENOMEM; length = -EFAULT; @@ -954,7 +1085,8 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, char __user *buffer, int buflen) { struct inode * inode; - char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; + char *tmp = (char*)__get_free_page(GFP_TEMPORARY); + char *path; int len; if (!tmp) @@ -1387,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent, struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *p = get_proc_task(inode); - unsigned int fd, tid, ino; + unsigned int fd, ino; int retval; struct files_struct * files; struct fdtable *fdt; @@ -1396,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, if (!p) goto out_no_task; retval = 0; - tid = p->pid; fd = filp->f_pos; switch (fd) { @@ -1631,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp, const struct pid_entry *ents, unsigned int nents) { int i; - int pid; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); @@ -1644,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp, goto out_no_task; ret = 0; - pid = task->pid; i = filp->f_pos; switch (i) { case 0: @@ -1726,7 +1855,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out; length = -ENOMEM; - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -1878,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", task_tgid_vnr(current)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", task_tgid_vnr(current)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2048,9 +2177,10 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - INF("environ", S_IRUSR, pid_environ), + REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif @@ -2080,9 +2210,12 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -2122,48 +2255,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .setattr = proc_setattr, }; -/** - * proc_flush_task - Remove dcache entries for @task from the /proc dcache. - * - * @task: task that should be flushed. - * - * Looks in the dcache for - * /proc/@pid - * /proc/@tgid/task/@pid - * if either directory is present flushes it and all of it'ts children - * from the dcache. - * - * It is safe and reasonable to cache /proc entries for a task until - * that task exits. After that they just clog up the dcache with - * useless entries, possibly causing useful dcache entries to be - * flushed instead. This routine is proved to flush those useless - * dcache entries at process exit time. - * - * NOTE: This routine is just an optimization so it does not guarantee - * that no dcache entries will exist at process exit time it - * just makes it very unlikely that any will persist. - */ -void proc_flush_task(struct task_struct *task) +static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; char buf[PROC_NUMBUF]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); - dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", pid); + dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (thread_group_leader(task)) + if (tgid == 0) goto out; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); - leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", tgid); + leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -2174,7 +2286,7 @@ void proc_flush_task(struct task_struct *task) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { shrink_dcache_parent(dentry); @@ -2189,6 +2301,52 @@ out: return; } +/** + * proc_flush_task - Remove dcache entries for @task from the /proc dcache. + * @task: task that should be flushed. + * + * When flushing dentries from proc, one needs to flush them from global + * proc (proc_mnt) and from all the namespaces' procs this task was seen + * in. This call is supposed to do all of this job. + * + * Looks in the dcache for + * /proc/@pid + * /proc/@tgid/task/@pid + * if either directory is present flushes it and all of it'ts children + * from the dcache. + * + * It is safe and reasonable to cache /proc entries for a task until + * that task exits. After that they just clog up the dcache with + * useless entries, possibly causing useful dcache entries to be + * flushed instead. This routine is proved to flush those useless + * dcache entries at process exit time. + * + * NOTE: This routine is just an optimization so it does not guarantee + * that no dcache entries will exist at process exit time it + * just makes it very unlikely that any will persist. + */ + +void proc_flush_task(struct task_struct *task) +{ + int i; + struct pid *pid, *tgid = NULL; + struct upid *upid; + + pid = task_pid(task); + if (thread_group_leader(task)) + tgid = task_tgid(task); + + for (i = 0; i <= pid->level; i++) { + upid = &pid->numbers[i]; + proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, + tgid ? tgid->numbers[i].nr : 0); + } + + upid = &pid->numbers[pid->level]; + if (upid->nr == 1) + pid_ns_release_proc(upid->ns); +} + static struct dentry *proc_pid_instantiate(struct inode *dir, struct dentry * dentry, struct task_struct *task, const void *ptr) @@ -2224,6 +2382,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; unsigned tgid; + struct pid_namespace *ns; result = proc_base_lookup(dir, dentry); if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) @@ -2233,8 +2392,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (tgid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tgid); + task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2251,7 +2411,8 @@ out: * Find the first task with tgid >= tgid * */ -static struct task_struct *next_tgid(unsigned int tgid) +static struct task_struct *next_tgid(unsigned int tgid, + struct pid_namespace *ns) { struct task_struct *task; struct pid *pid; @@ -2259,9 +2420,9 @@ static struct task_struct *next_tgid(unsigned int tgid) rcu_read_lock(); retry: task = NULL; - pid = find_ge_pid(tgid); + pid = find_ge_pid(tgid, ns); if (pid) { - tgid = pid->nr + 1; + tgid = pid_nr_ns(pid, ns) + 1; task = pid_task(pid, PIDTYPE_PID); /* What we to know is if the pid we have find is the * pid of a thread_group_leader. Testing for task @@ -2301,6 +2462,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); struct task_struct *task; int tgid; + struct pid_namespace *ns; if (!reaper) goto out_no_task; @@ -2311,11 +2473,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) goto out; } + ns = filp->f_dentry->d_sb->s_fs_info; tgid = filp->f_pos - TGID_OFFSET; - for (task = next_tgid(tgid); + for (task = next_tgid(tgid, ns); task; - put_task_struct(task), task = next_tgid(tgid + 1)) { - tgid = task->pid; + put_task_struct(task), task = next_tgid(tgid + 1, ns)) { + tgid = task_pid_nr_ns(task, ns); filp->f_pos = tgid + TGID_OFFSET; if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { put_task_struct(task); @@ -2335,9 +2498,10 @@ out_no_task: static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - INF("environ", S_IRUSR, pid_environ), + REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif @@ -2366,9 +2530,12 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -2436,6 +2603,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; + struct pid_namespace *ns; if (!leader) goto out_no_task; @@ -2444,14 +2612,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (tid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tid); + task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; - if (leader->tgid != task->tgid) + if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dir, dentry, task, NULL); @@ -2476,14 +2645,14 @@ out_no_task: * threads past it. */ static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr) + int tid, int nr, struct pid_namespace *ns) { struct task_struct *pos; rcu_read_lock(); /* Attempt to start with the pid of a thread */ if (tid && (nr > 0)) { - pos = find_task_by_pid(tid); + pos = find_task_by_pid_ns(tid, ns); if (pos && (pos->group_leader == leader)) goto found; } @@ -2552,6 +2721,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi ino_t ino; int tid; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ + struct pid_namespace *ns; task = get_proc_task(inode); if (!task) @@ -2585,16 +2755,17 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - tid = filp->f_version; + ns = filp->f_dentry->d_sb->s_fs_info; + tid = (int)filp->f_version; filp->f_version = 0; - for (task = first_tid(leader, tid, pos - 2); + for (task = first_tid(leader, tid, pos - 2, ns); task; task = next_tid(task), pos++) { - tid = task->pid; + tid = task_pid_nr_ns(task, ns); if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - filp->f_version = tid; + filp->f_version = (u64)tid; put_task_struct(task); break; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b5e7155d30d..a9806bc21ec 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -74,7 +74,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, nbytes = MAX_NON_LFS - pos; dp = PDE(inode); - if (!(page = (char*) __get_free_page(GFP_KERNEL))) + if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) return -ENOMEM; while ((nbytes > 0) && !eof) { @@ -561,28 +561,33 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp static void proc_kill_inodes(struct proc_dir_entry *de) { struct list_head *p; - struct super_block *sb = proc_mnt->mnt_sb; + struct super_block *sb; /* * Actually it's a partial revoke(). */ - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file * filp = list_entry(p, struct file, f_u.fu_list); - struct dentry * dentry = filp->f_path.dentry; - struct inode * inode; - const struct file_operations *fops; - - if (dentry->d_op != &proc_dentry_operations) - continue; - inode = dentry->d_inode; - if (PDE(inode) != de) - continue; - fops = filp->f_op; - filp->f_op = NULL; - fops_put(fops); + spin_lock(&sb_lock); + list_for_each_entry(sb, &proc_fs_type.fs_supers, s_instances) { + file_list_lock(); + list_for_each(p, &sb->s_files) { + struct file *filp = list_entry(p, struct file, + f_u.fu_list); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode; + const struct file_operations *fops; + + if (dentry->d_op != &proc_dentry_operations) + continue; + inode = dentry->d_inode; + if (PDE(inode) != de) + continue; + fops = filp->f_op; + filp->f_op = NULL; + fops_put(fops); + } + file_list_unlock(); } - file_list_unlock(); + spin_unlock(&sb_lock); } static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 0e4d37c93ee..abe6a3f0436 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -107,7 +107,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +static void init_once(struct kmem_cache * cachep, void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; @@ -119,10 +119,8 @@ int __init proc_init_inodecache(void) proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + SLAB_MEM_SPREAD|SLAB_PANIC), init_once); - if (proc_inode_cachep == NULL) - return -ENOMEM; return 0; } @@ -450,7 +448,7 @@ out_mod: return NULL; } -int proc_fill_super(struct super_block *s, void *data, int silent) +int proc_fill_super(struct super_block *s) { struct inode * root_inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1820eb2ef76..1b2b6c6bb47 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -78,3 +78,5 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } + +extern struct file_system_type proc_fs_type; diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c index 25d2d9c6e32..8ae221dfd01 100644 --- a/fs/proc/mmu.c +++ b/fs/proc/mmu.c @@ -8,27 +8,10 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/mman.h> -#include <linux/proc_fs.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/pagemap.h> -#include <linux/swap.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/seq_file.h> -#include <linux/hugetlb.h> +#include <linux/spinlock.h> #include <linux/vmalloc.h> -#include <asm/uaccess.h> +#include <linux/highmem.h> #include <asm/pgtable.h> -#include <asm/tlb.h> -#include <asm/div64.h> #include "internal.h" void get_vmalloc_info(struct vmalloc_info *vmi) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index b872a01ad3a..e0d064e9764 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -66,7 +66,6 @@ extern int get_stram_list(char *); extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); -extern int get_locks_status (char *, char **, off_t, int); static int proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) @@ -92,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off, LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid); + nr_running(), nr_threads, + task_active_pid_ns(current)->last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -230,6 +230,19 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; +extern struct seq_operations pagetypeinfo_op; +static int pagetypeinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pagetypeinfo_op); +} + +static const struct file_operations pagetypeinfo_file_ops = { + .open = pagetypeinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + extern struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { @@ -514,11 +527,8 @@ static int show_stat(struct seq_file *p, void *v) } seq_printf(p, "intr %llu", (unsigned long long)sum); -#ifndef CONFIG_SMP - /* Touches too many cache lines on SMP setups */ for (i = 0; i < NR_IRQS; i++) seq_printf(p, " %u", per_irq_sum[i]); -#endif seq_printf(p, "\nctxt %llu\n" @@ -624,16 +634,18 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -static int locks_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int locks_open(struct inode *inode, struct file *filp) { - int len = get_locks_status(page, start, off, count); - - if (len < count) - *eof = 1; - return len; + return seq_open(filp, &locks_seq_operations); } +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -691,7 +703,6 @@ void __init proc_misc_init(void) #endif {"filesystems", filesystems_read_proc}, {"cmdline", cmdline_read_proc}, - {"locks", locks_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} }; @@ -709,6 +720,7 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; } #endif + create_seq_entry("locks", 0, &proc_locks_operations); create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK @@ -723,6 +735,7 @@ void __init proc_misc_init(void) #endif #endif create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); + create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 2e91fb756e9..131f9c68be5 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -26,13 +26,6 @@ #include "internal.h" -struct proc_dir_entry *proc_net_create(struct net *net, - const char *name, mode_t mode, get_info_t *get_info) -{ - return create_proc_info_entry(name,mode, net->proc_net, get_info); -} -EXPORT_SYMBOL_GPL(proc_net_create); - struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { @@ -185,7 +178,7 @@ static __net_exit void proc_net_ns_exit(struct net *net) kfree(net->proc_net_root); } -struct pernet_operations __net_initdata proc_net_ns_ops = { +static struct pernet_operations __net_initdata proc_net_ns_ops = { .init = proc_net_ns_init, .exit = proc_net_ns_exit, }; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 680c429bfa2..4e57fcf8598 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -171,7 +171,8 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf, struct dentry *dentry = filp->f_dentry; struct ctl_table_header *head; struct ctl_table *table; - ssize_t error, res; + ssize_t error; + size_t res; table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); /* Has the sysctl entry disappeared on us? */ @@ -209,7 +210,8 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, struct dentry *dentry = filp->f_dentry; struct ctl_table_header *head; struct ctl_table *table; - ssize_t error, res; + ssize_t error; + size_t res; table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); /* Has the sysctl entry disappeared on us? */ diff --git a/fs/proc/root.c b/fs/proc/root.c index cf3046638b0..1f86bb860e0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,32 +18,90 @@ #include <linux/bitops.h> #include <linux/smp_lock.h> #include <linux/mount.h> +#include <linux/pid_namespace.h> #include "internal.h" struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; +static int proc_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_set_super(struct super_block *sb, void *data) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + return set_anon_super(sb, NULL); +} + static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { + int err; + struct super_block *sb; + struct pid_namespace *ns; + struct proc_inode *ei; + if (proc_mnt) { /* Seed the root directory with a pid so it doesn't need * to be special in base.c. I would do this earlier but * the only task alive when /proc is mounted the first time * is the init_task and it doesn't have any pids. */ - struct proc_inode *ei; ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); if (!ei->pid) ei->pid = find_get_pid(1); } - return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); + + if (flags & MS_KERNMOUNT) + ns = (struct pid_namespace *)data; + else + ns = current->nsproxy->pid_ns; + + sb = sget(fs_type, proc_test_super, proc_set_super, ns); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + sb->s_flags = flags; + err = proc_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + ei = PROC_I(sb->s_root->d_inode); + if (!ei->pid) { + rcu_read_lock(); + ei->pid = get_pid(find_pid_ns(1, ns)); + rcu_read_unlock(); + } + + sb->s_flags |= MS_ACTIVE; + ns->proc_mnt = mnt; + } + + return simple_set_mnt(mnt, sb); } -static struct file_system_type proc_fs_type = { +static void proc_kill_sb(struct super_block *sb) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)sb->s_fs_info; + kill_anon_super(sb); + put_pid_ns(ns); +} + +struct file_system_type proc_fs_type = { .name = "proc", .get_sb = proc_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = proc_kill_sb, }; void __init proc_root_init(void) @@ -54,12 +112,13 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - proc_mnt = kern_mount(&proc_fs_type); + proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); err = PTR_ERR(proc_mnt); if (IS_ERR(proc_mnt)) { unregister_filesystem(&proc_fs_type); return; } + proc_misc_init(); proc_net_init(); @@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = { .parent = &proc_root, }; +int pid_ns_prepare_proc(struct pid_namespace *ns) +{ + struct vfsmount *mnt; + + mnt = kern_mount_data(&proc_fs_type, ns); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + return 0; +} + +void pid_ns_release_proc(struct pid_namespace *ns) +{ + mntput(ns->proc_mnt); +} + EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); |
