summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c47
-rw-r--r--fs/proc/base.c356
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/mmu.c21
-rw-r--r--fs/proc/proc_misc.c54
-rw-r--r--fs/proc/root.c83
7 files changed, 422 insertions, 149 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ee4814dd98f..63c95afb561 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -77,6 +77,7 @@
#include <linux/cpuset.h>
#include <linux/rcupdate.h>
#include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
TASK_UNINTERRUPTIBLE |
TASK_STOPPED |
TASK_TRACED)) |
- (tsk->exit_state & (EXIT_ZOMBIE |
- EXIT_DEAD));
+ tsk->exit_state;
const char **p = &task_state_array[0];
while (state) {
@@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer)
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
+ struct pid_namespace *ns;
+ pid_t ppid, tpid;
+ ns = current->nsproxy->pid_ns;
rcu_read_lock();
+ ppid = pid_alive(p) ?
+ task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
+ tpid = pid_alive(p) && p->ptrace ?
+ task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
buffer += sprintf(buffer,
"State:\t%s\n"
"Tgid:\t%d\n"
@@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer)
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
- p->tgid, p->pid,
- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
- pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+ task_tgid_nr_ns(p, ns),
+ task_pid_nr_ns(p, ns),
+ ppid, tpid,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
@@ -370,6 +377,11 @@ static cputime_t task_stime(struct task_struct *p)
}
#endif
+static cputime_t task_gtime(struct task_struct *p)
+{
+ return p->gtime;
+}
+
static int do_task_stat(struct task_struct *task, char *buffer, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -385,9 +397,13 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
cputime_t cutime, cstime, utime, stime;
+ cputime_t cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
+ struct pid_namespace *ns;
+
+ ns = current->nsproxy->pid_ns;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -403,13 +419,14 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
sigemptyset(&sigign);
sigemptyset(&sigcatch);
cutime = cstime = utime = stime = cputime_zero;
+ cgtime = gtime = cputime_zero;
rcu_read_lock();
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
if (sig->tty) {
- tty_pgrp = pid_nr(sig->tty->pgrp);
+ tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
tty_nr = new_encode_dev(tty_devnum(sig->tty));
}
@@ -420,6 +437,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
cmaj_flt = sig->cmaj_flt;
cutime = sig->cutime;
cstime = sig->cstime;
+ cgtime = sig->cgtime;
rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
/* add up live thread stats at the group level */
@@ -430,6 +448,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
maj_flt += t->maj_flt;
utime = cputime_add(utime, task_utime(t));
stime = cputime_add(stime, task_stime(t));
+ gtime = cputime_add(gtime, task_gtime(t));
t = next_thread(t);
} while (t != task);
@@ -437,11 +456,12 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
maj_flt += sig->maj_flt;
utime = cputime_add(utime, sig->utime);
stime = cputime_add(stime, sig->stime);
+ gtime = cputime_add(gtime, sig->gtime);
}
- sid = signal_session(sig);
- pgid = process_group(task);
- ppid = rcu_dereference(task->real_parent)->tgid;
+ sid = task_session_nr_ns(task, ns);
+ pgid = task_pgrp_nr_ns(task, ns);
+ ppid = task_ppid_nr_ns(task, ns);
unlock_task_sighand(task, &flags);
}
@@ -454,6 +474,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
maj_flt = task->maj_flt;
utime = task_utime(task);
stime = task_stime(task);
+ gtime = task_gtime(task);
}
/* scale priority and nice values from timeslices to -20..20 */
@@ -471,8 +492,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
- task->pid,
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
+ task_pid_nr_ns(task, ns),
tcomm,
state,
ppid,
@@ -516,7 +537,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
task_cpu(task),
task->rt_priority,
task->policy,
- (unsigned long long)delayacct_blkio_ticks(task));
+ (unsigned long long)delayacct_blkio_ticks(task),
+ cputime_to_clock_t(gtime),
+ cputime_to_clock_t(cgtime));
if (mm)
mmput(mm);
return res;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 19489b0d555..aeaf0d0f2f5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -63,16 +63,19 @@
#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
+#include <linux/resource.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
+#include <linux/cgroup.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
#include <linux/poll.h>
#include <linux/nsproxy.h>
#include <linux/oom.h>
#include <linux/elf.h>
+#include <linux/pid_namespace.h>
#include "internal.h"
/* NOTE:
@@ -199,27 +202,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
(task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
security_ptrace(current,task) == 0))
-static int proc_pid_environ(struct task_struct *task, char * buffer)
-{
- int res = 0;
- struct mm_struct *mm = get_task_mm(task);
- if (mm) {
- unsigned int len;
-
- res = -ESRCH;
- if (!ptrace_may_attach(task))
- goto out;
-
- len = mm->env_end - mm->env_start;
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
- res = access_process_vm(task, mm->env_start, buffer, len, 0);
-out:
- mmput(mm);
- }
- return res;
-}
-
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
@@ -304,7 +286,7 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
return sprintf(buffer, "%llu %llu %lu\n",
task->sched_info.cpu_time,
task->sched_info.run_delay,
- task->sched_info.pcnt);
+ task->sched_info.pcount);
}
#endif
@@ -322,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
return sprintf(buffer, "%lu\n", points);
}
+struct limit_names {
+ char *name;
+ char *unit;
+};
+
+static const struct limit_names lnames[RLIM_NLIMITS] = {
+ [RLIMIT_CPU] = {"Max cpu time", "ms"},
+ [RLIMIT_FSIZE] = {"Max file size", "bytes"},
+ [RLIMIT_DATA] = {"Max data size", "bytes"},
+ [RLIMIT_STACK] = {"Max stack size", "bytes"},
+ [RLIMIT_CORE] = {"Max core file size", "bytes"},
+ [RLIMIT_RSS] = {"Max resident set", "bytes"},
+ [RLIMIT_NPROC] = {"Max processes", "processes"},
+ [RLIMIT_NOFILE] = {"Max open files", "files"},
+ [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
+ [RLIMIT_AS] = {"Max address space", "bytes"},
+ [RLIMIT_LOCKS] = {"Max file locks", "locks"},
+ [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
+ [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
+ [RLIMIT_NICE] = {"Max nice priority", NULL},
+ [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
+};
+
+/* Display limits for a process */
+static int proc_pid_limits(struct task_struct *task, char *buffer)
+{
+ unsigned int i;
+ int count = 0;
+ unsigned long flags;
+ char *bufptr = buffer;
+
+ struct rlimit rlim[RLIM_NLIMITS];
+
+ rcu_read_lock();
+ if (!lock_task_sighand(task,&flags)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
+ unlock_task_sighand(task, &flags);
+ rcu_read_unlock();
+
+ /*
+ * print the file header
+ */
+ count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+ "Limit", "Soft Limit", "Hard Limit", "Units");
+
+ for (i = 0; i < RLIM_NLIMITS; i++) {
+ if (rlim[i].rlim_cur == RLIM_INFINITY)
+ count += sprintf(&bufptr[count], "%-25s %-20s ",
+ lnames[i].name, "unlimited");
+ else
+ count += sprintf(&bufptr[count], "%-25s %-20lu ",
+ lnames[i].name, rlim[i].rlim_cur);
+
+ if (rlim[i].rlim_max == RLIM_INFINITY)
+ count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+ else
+ count += sprintf(&bufptr[count], "%-20lu ",
+ rlim[i].rlim_max);
+
+ if (lnames[i].unit)
+ count += sprintf(&bufptr[count], "%-10s\n",
+ lnames[i].unit);
+ else
+ count += sprintf(&bufptr[count], "\n");
+ }
+
+ return count;
+}
+
/************************************************************************/
/* Here the fs part begins */
/************************************************************************/
@@ -370,18 +424,21 @@ struct proc_mounts {
static int mounts_open(struct inode *inode, struct file *file)
{
struct task_struct *task = get_proc_task(inode);
+ struct nsproxy *nsp;
struct mnt_namespace *ns = NULL;
struct proc_mounts *p;
int ret = -EINVAL;
if (task) {
- task_lock(task);
- if (task->nsproxy) {
- ns = task->nsproxy->mnt_ns;
+ rcu_read_lock();
+ nsp = task_nsproxy(task);
+ if (nsp) {
+ ns = nsp->mnt_ns;
if (ns)
get_mnt_ns(ns);
}
- task_unlock(task);
+ rcu_read_unlock();
+
put_task_struct(task);
}
@@ -444,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *m = file->private_data;
+ struct nsproxy *nsp;
struct mnt_namespace *mnt_ns = NULL;
struct task_struct *task = get_proc_task(inode);
if (task) {
- task_lock(task);
- if (task->nsproxy)
- mnt_ns = task->nsproxy->mnt_ns;
- if (mnt_ns)
- get_mnt_ns(mnt_ns);
- task_unlock(task);
+ rcu_read_lock();
+ nsp = task_nsproxy(task);
+ if (nsp) {
+ mnt_ns = nsp->mnt_ns;
+ if (mnt_ns)
+ get_mnt_ns(mnt_ns);
+ }
+ rcu_read_unlock();
+
put_task_struct(task);
}
@@ -492,7 +553,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
count = PROC_BLOCK_SIZE;
length = -ENOMEM;
- if (!(page = __get_free_page(GFP_KERNEL)))
+ if (!(page = __get_free_page(GFP_TEMPORARY)))
goto out;
length = PROC_I(inode)->op.proc_read(task, (char*)page);
@@ -532,7 +593,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
goto out;
ret = -ENOMEM;
- page = (char *)__get_free_page(GFP_USER);
+ page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
@@ -602,7 +663,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
goto out;
copied = -ENOMEM;
- page = (char *)__get_free_page(GFP_USER);
+ page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
@@ -658,6 +719,76 @@ static const struct file_operations proc_mem_operations = {
.open = mem_open,
};
+static ssize_t environ_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+ char *page;
+ unsigned long src = *ppos;
+ int ret = -ESRCH;
+ struct mm_struct *mm;
+
+ if (!task)
+ goto out_no_task;
+
+ if (!ptrace_may_attach(task))
+ goto out;
+
+ ret = -ENOMEM;
+ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ goto out;
+
+ ret = 0;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_free;
+
+ while (count > 0) {
+ int this_len, retval, max_len;
+
+ this_len = mm->env_end - (mm->env_start + src);
+
+ if (this_len <= 0)
+ break;
+
+ max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+ this_len = (this_len > max_len) ? max_len : this_len;
+
+ retval = access_process_vm(task, (mm->env_start + src),
+ page, this_len, 0);
+
+ if (retval <= 0) {
+ ret = retval;
+ break;
+ }
+
+ if (copy_to_user(buf, page, retval)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret += retval;
+ src += retval;
+ buf += retval;
+ count -= retval;
+ }
+ *ppos = src;
+
+ mmput(mm);
+out_free:
+ free_page((unsigned long) page);
+out:
+ put_task_struct(task);
+out_no_task:
+ return ret;
+}
+
+static const struct file_operations proc_environ_operations = {
+ .read = environ_read,
+};
+
static ssize_t oom_adjust_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -788,7 +919,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
/* No partial writes. */
return -EINVAL;
}
- page = (char*)__get_free_page(GFP_USER);
+ page = (char*)__get_free_page(GFP_TEMPORARY);
if (!page)
return -ENOMEM;
length = -EFAULT;
@@ -954,7 +1085,8 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
char __user *buffer, int buflen)
{
struct inode * inode;
- char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
+ char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
+ char *path;
int len;
if (!tmp)
@@ -1387,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent,
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct task_struct *p = get_proc_task(inode);
- unsigned int fd, tid, ino;
+ unsigned int fd, ino;
int retval;
struct files_struct * files;
struct fdtable *fdt;
@@ -1396,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
if (!p)
goto out_no_task;
retval = 0;
- tid = p->pid;
fd = filp->f_pos;
switch (fd) {
@@ -1631,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp,
const struct pid_entry *ents, unsigned int nents)
{
int i;
- int pid;
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
@@ -1644,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp,
goto out_no_task;
ret = 0;
- pid = task->pid;
i = filp->f_pos;
switch (i) {
case 0:
@@ -1726,7 +1855,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
goto out;
length = -ENOMEM;
- page = (char*)__get_free_page(GFP_USER);
+ page = (char*)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
@@ -1878,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
int buflen)
{
char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", current->tgid);
+ sprintf(tmp, "%d", task_tgid_vnr(current));
return vfs_readlink(dentry,buffer,buflen,tmp);
}
static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", current->tgid);
+ sprintf(tmp, "%d", task_tgid_vnr(current));
return ERR_PTR(vfs_follow_link(nd,tmp));
}
@@ -2048,9 +2177,10 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, task),
DIR("fd", S_IRUSR|S_IXUSR, fd),
DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
- INF("environ", S_IRUSR, pid_environ),
+ REG("environ", S_IRUSR, environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+ INF("limits", S_IRUSR, pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
@@ -2080,9 +2210,12 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, pid_schedstat),
#endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
REG("cpuset", S_IRUGO, cpuset),
#endif
+#ifdef CONFIG_CGROUPS
+ REG("cgroup", S_IRUGO, cgroup),
+#endif
INF("oom_score", S_IRUGO, oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
#ifdef CONFIG_AUDITSYSCALL
@@ -2122,48 +2255,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
.setattr = proc_setattr,
};
-/**
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
- *
- * @task: task that should be flushed.
- *
- * Looks in the dcache for
- * /proc/@pid
- * /proc/@tgid/task/@pid
- * if either directory is present flushes it and all of it'ts children
- * from the dcache.
- *
- * It is safe and reasonable to cache /proc entries for a task until
- * that task exits. After that they just clog up the dcache with
- * useless entries, possibly causing useful dcache entries to be
- * flushed instead. This routine is proved to flush those useless
- * dcache entries at process exit time.
- *
- * NOTE: This routine is just an optimization so it does not guarantee
- * that no dcache entries will exist at process exit time it
- * just makes it very unlikely that any will persist.
- */
-void proc_flush_task(struct task_struct *task)
+static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
{
struct dentry *dentry, *leader, *dir;
char buf[PROC_NUMBUF];
struct qstr name;
name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
- dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+ name.len = snprintf(buf, sizeof(buf), "%d", pid);
+ dentry = d_hash_and_lookup(mnt->mnt_root, &name);
if (dentry) {
shrink_dcache_parent(dentry);
d_drop(dentry);
dput(dentry);
}
- if (thread_group_leader(task))
+ if (tgid == 0)
goto out;
name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
- leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+ name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+ leader = d_hash_and_lookup(mnt->mnt_root, &name);
if (!leader)
goto out;
@@ -2174,7 +2286,7 @@ void proc_flush_task(struct task_struct *task)
goto out_put_leader;
name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+ name.len = snprintf(buf, sizeof(buf), "%d", pid);
dentry = d_hash_and_lookup(dir, &name);
if (dentry) {
shrink_dcache_parent(dentry);
@@ -2189,6 +2301,55 @@ out:
return;
}
+/**
+ * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
+ * @task: task that should be flushed.
+ *
+ * When flushing dentries from proc, one needs to flush them from global
+ * proc (proc_mnt) and from all the namespaces' procs this task was seen
+ * in. This call is supposed to do all of this job.
+ *
+ * Looks in the dcache for
+ * /proc/@pid
+ * /proc/@tgid/task/@pid
+ * if either directory is present flushes it and all of it'ts children
+ * from the dcache.
+ *
+ * It is safe and reasonable to cache /proc entries for a task until
+ * that task exits. After that they just clog up the dcache with
+ * useless entries, possibly causing useful dcache entries to be
+ * flushed instead. This routine is proved to flush those useless
+ * dcache entries at process exit time.
+ *
+ * NOTE: This routine is just an optimization so it does not guarantee
+ * that no dcache entries will exist at process exit time it
+ * just makes it very unlikely that any will persist.
+ */
+
+void proc_flush_task(struct task_struct *task)
+{
+ int i, leader;
+ struct pid *pid, *tgid;
+ struct upid *upid;
+
+ leader = thread_group_leader(task);
+ proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0);
+ pid = task_pid(task);
+ if (pid->level == 0)
+ return;
+
+ tgid = task_tgid(task);
+ for (i = 1; i <= pid->level; i++) {
+ upid = &pid->numbers[i];
+ proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
+ leader ? 0 : tgid->numbers[i].nr);
+ }
+
+ upid = &pid->numbers[pid->level];
+ if (upid->nr == 1)
+ pid_ns_release_proc(upid->ns);
+}
+
static struct dentry *proc_pid_instantiate(struct inode *dir,
struct dentry * dentry,
struct task_struct *task, const void *ptr)
@@ -2224,6 +2385,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
struct dentry *result = ERR_PTR(-ENOENT);
struct task_struct *task;
unsigned tgid;
+ struct pid_namespace *ns;
result = proc_base_lookup(dir, dentry);
if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
@@ -2233,8 +2395,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
if (tgid == ~0U)
goto out;
+ ns = dentry->d_sb->s_fs_info;
rcu_read_lock();
- task = find_task_by_pid(tgid);
+ task = find_task_by_pid_ns(tgid, ns);
if (task)
get_task_struct(task);
rcu_read_unlock();
@@ -2251,7 +2414,8 @@ out:
* Find the first task with tgid >= tgid
*
*/
-static struct task_struct *next_tgid(unsigned int tgid)
+static struct task_struct *next_tgid(unsigned int tgid,
+ struct pid_namespace *ns)
{
struct task_struct *task;
struct pid *pid;
@@ -2259,9 +2423,9 @@ static struct task_struct *next_tgid(unsigned int tgid)
rcu_read_lock();
retry:
task = NULL;
- pid = find_ge_pid(tgid);
+ pid = find_ge_pid(tgid, ns);
if (pid) {
- tgid = pid->nr + 1;
+ tgid = pid_nr_ns(pid, ns) + 1;
task = pid_task(pid, PIDTYPE_PID);
/* What we to know is if the pid we have find is the
* pid of a thread_group_leader. Testing for task
@@ -2301,6 +2465,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
struct task_struct *task;
int tgid;
+ struct pid_namespace *ns;
if (!reaper)
goto out_no_task;
@@ -2311,11 +2476,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
goto out;
}
+ ns = filp->f_dentry->d_sb->s_fs_info;
tgid = filp->f_pos - TGID_OFFSET;
- for (task = next_tgid(tgid);
+ for (task = next_tgid(tgid, ns);
task;
- put_task_struct(task), task = next_tgid(tgid + 1)) {
- tgid = task->pid;
+ put_task_struct(task), task = next_tgid(tgid + 1, ns)) {
+ tgid = task_pid_nr_ns(task, ns);
filp->f_pos = tgid + TGID_OFFSET;
if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
put_task_struct(task);
@@ -2335,9 +2501,10 @@ out_no_task:
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, fd),
DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
- INF("environ", S_IRUSR, pid_environ),
+ REG("environ", S_IRUSR, environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+ INF("limits", S_IRUSR, pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
@@ -2366,9 +2533,12 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, pid_schedstat),
#endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
REG("cpuset", S_IRUGO, cpuset),
#endif
+#ifdef CONFIG_CGROUPS
+ REG("cgroup", S_IRUGO, cgroup),
+#endif
INF("oom_score", S_IRUGO, oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
#ifdef CONFIG_AUDITSYSCALL
@@ -2436,6 +2606,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
+ struct pid_namespace *ns;
if (!leader)
goto out_no_task;
@@ -2444,14 +2615,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (tid == ~0U)
goto out;
+ ns = dentry->d_sb->s_fs_info;
rcu_read_lock();
- task = find_task_by_pid(tid);
+ task = find_task_by_pid_ns(tid, ns);
if (task)
get_task_struct(task);
rcu_read_unlock();
if (!task)
goto out;
- if (leader->tgid != task->tgid)
+ if (!same_thread_group(leader, task))
goto out_drop_task;
result = proc_task_instantiate(dir, dentry, task, NULL);
@@ -2476,14 +2648,14 @@ out_no_task:
* threads past it.
*/
static struct task_struct *first_tid(struct task_struct *leader,
- int tid, int nr)
+ int tid, int nr, struct pid_namespace *ns)
{
struct task_struct *pos;
rcu_read_lock();
/* Attempt to start with the pid of a thread */
if (tid && (nr > 0)) {
- pos = find_task_by_pid(tid);
+ pos = find_task_by_pid_ns(tid, ns);
if (pos && (pos->group_leader == leader))
goto found;
}
@@ -2552,6 +2724,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
ino_t ino;
int tid;
unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
+ struct pid_namespace *ns;
task = get_proc_task(inode);
if (!task)
@@ -2585,16 +2758,17 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- tid = filp->f_version;
+ ns = filp->f_dentry->d_sb->s_fs_info;
+ tid = (int)filp->f_version;
filp->f_version = 0;
- for (task = first_tid(leader, tid, pos - 2);
+ for (task = first_tid(leader, tid, pos - 2, ns);
task;
task = next_tid(task), pos++) {
- tid = task->pid;
+ tid = task_pid_nr_ns(task, ns);
if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
/* returning this tgid failed, save it as the first
* pid for the next readir call */
- filp->f_version = tid;
+ filp->f_version = (u64)tid;
put_task_struct(task);
break;
}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b5e7155d30d..1bdb6243575 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -74,7 +74,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
nbytes = MAX_NON_LFS - pos;
dp = PDE(inode);
- if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
return -ENOMEM;
while ((nbytes > 0) && !eof) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0e4d37c93ee..abe6a3f0436 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -107,7 +107,7 @@ static void proc_destroy_inode(struct inode *inode)
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
-static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
+static void init_once(struct kmem_cache * cachep, void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
@@ -119,10 +119,8 @@ int __init proc_init_inodecache(void)
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_PANIC),
init_once);
- if (proc_inode_cachep == NULL)
- return -ENOMEM;
return 0;
}
@@ -450,7 +448,7 @@ out_mod:
return NULL;
}
-int proc_fill_super(struct super_block *s, void *data, int silent)
+int proc_fill_super(struct super_block *s)
{
struct inode * root_inode;
diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c
index 25d2d9c6e32..8ae221dfd01 100644
--- a/fs/proc/mmu.c
+++ b/fs/proc/mmu.c
@@ -8,27 +8,10 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mman.h>
-#include <linux/proc_fs.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/seq_file.h>
-#include <linux/hugetlb.h>
+#include <linux/spinlock.h>
#include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
-#include <asm/tlb.h>
-#include <asm/div64.h>
#include "internal.h"
void get_vmalloc_info(struct vmalloc_info *vmi)
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index bee251cb87c..e0d064e9764 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -66,7 +66,6 @@ extern int get_stram_list(char *);
extern int get_filesystem_list(char *);
extern int get_exec_domain_list(char *);
extern int get_dma_list(char *);
-extern int get_locks_status (char *, char **, off_t, int);
static int proc_calc_metrics(char *page, char **start, off_t off,
int count, int *eof, int len)
@@ -92,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
LOAD_INT(a), LOAD_FRAC(a),
LOAD_INT(b), LOAD_FRAC(b),
LOAD_INT(c), LOAD_FRAC(c),
- nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
+ nr_running(), nr_threads,
+ task_active_pid_ns(current)->last_pid);
return proc_calc_metrics(page, start, off, count, eof, len);
}
@@ -230,6 +230,19 @@ static const struct file_operations fragmentation_file_operations = {
.release = seq_release,
};
+extern struct seq_operations pagetypeinfo_op;
+static int pagetypeinfo_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &pagetypeinfo_op);
+}
+
+static const struct file_operations pagetypeinfo_file_ops = {
+ .open = pagetypeinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
extern struct seq_operations zoneinfo_op;
static int zoneinfo_open(struct inode *inode, struct file *file)
{
@@ -443,6 +456,7 @@ static int show_stat(struct seq_file *p, void *v)
int i;
unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ cputime64_t guest;
u64 sum = 0;
struct timespec boottime;
unsigned int *per_irq_sum;
@@ -453,6 +467,7 @@ static int show_stat(struct seq_file *p, void *v)
user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
+ guest = cputime64_zero;
getboottime(&boottime);
jif = boottime.tv_sec;
@@ -467,6 +482,7 @@ static int show_stat(struct seq_file *p, void *v)
irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
+ guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
for (j = 0; j < NR_IRQS; j++) {
unsigned int temp = kstat_cpu(i).irqs[j];
sum += temp;
@@ -474,7 +490,7 @@ static int show_stat(struct seq_file *p, void *v)
}
}
- seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
(unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice),
(unsigned long long)cputime64_to_clock_t(system),
@@ -482,7 +498,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(iowait),
(unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal));
+ (unsigned long long)cputime64_to_clock_t(steal),
+ (unsigned long long)cputime64_to_clock_t(guest));
for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -494,7 +511,9 @@ static int show_stat(struct seq_file *p, void *v)
irq = kstat_cpu(i).cpustat.irq;
softirq = kstat_cpu(i).cpustat.softirq;
steal = kstat_cpu(i).cpustat.steal;
- seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ guest = kstat_cpu(i).cpustat.guest;
+ seq_printf(p,
+ "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
i,
(unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice),
@@ -503,15 +522,13 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(iowait),
(unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal));
+ (unsigned long long)cputime64_to_clock_t(steal),
+ (unsigned long long)cputime64_to_clock_t(guest));
}
seq_printf(p, "intr %llu", (unsigned long long)sum);
-#ifndef CONFIG_SMP
- /* Touches too many cache lines on SMP setups */
for (i = 0; i < NR_IRQS; i++)
seq_printf(p, " %u", per_irq_sum[i]);
-#endif
seq_printf(p,
"\nctxt %llu\n"
@@ -617,16 +634,18 @@ static int cmdline_read_proc(char *page, char **start, off_t off,
return proc_calc_metrics(page, start, off, count, eof, len);
}
-static int locks_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int locks_open(struct inode *inode, struct file *filp)
{
- int len = get_locks_status(page, start, off, count);
-
- if (len < count)
- *eof = 1;
- return len;
+ return seq_open(filp, &locks_seq_operations);
}
+static const struct file_operations proc_locks_operations = {
+ .open = locks_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static int execdomains_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
@@ -684,7 +703,6 @@ void __init proc_misc_init(void)
#endif
{"filesystems", filesystems_read_proc},
{"cmdline", cmdline_read_proc},
- {"locks", locks_read_proc},
{"execdomains", execdomains_read_proc},
{NULL,}
};
@@ -702,6 +720,7 @@ void __init proc_misc_init(void)
entry->proc_fops = &proc_kmsg_operations;
}
#endif
+ create_seq_entry("locks", 0, &proc_locks_operations);
create_seq_entry("devices", 0, &proc_devinfo_operations);
create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
#ifdef CONFIG_BLOCK
@@ -716,6 +735,7 @@ void __init proc_misc_init(void)
#endif
#endif
create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+ create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops);
create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
#ifdef CONFIG_BLOCK
diff --git a/fs/proc/root.c b/fs/proc/root.c
index cf3046638b0..ec9cb3b6c93 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,32 +18,90 @@
#include <linux/bitops.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
+#include <linux/pid_namespace.h>
#include "internal.h"
struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
+static int proc_test_super(struct super_block *sb, void *data)
+{
+ return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+ struct pid_namespace *ns;
+
+ ns = (struct pid_namespace *)data;
+ sb->s_fs_info = get_pid_ns(ns);
+ return set_anon_super(sb, NULL);
+}
+
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
+ int err;
+ struct super_block *sb;
+ struct pid_namespace *ns;
+ struct proc_inode *ei;
+
if (proc_mnt) {
/* Seed the root directory with a pid so it doesn't need
* to be special in base.c. I would do this earlier but
* the only task alive when /proc is mounted the first time
* is the init_task and it doesn't have any pids.
*/
- struct proc_inode *ei;
ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
if (!ei->pid)
ei->pid = find_get_pid(1);
}
- return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
+
+ if (flags & MS_KERNMOUNT)
+ ns = (struct pid_namespace *)data;
+ else
+ ns = current->nsproxy->pid_ns;
+
+ sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
+
+ if (!sb->s_root) {
+ sb->s_flags = flags;
+ err = proc_fill_super(sb);
+ if (err) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ return err;
+ }
+
+ ei = PROC_I(sb->s_root->d_inode);
+ if (!ei->pid) {
+ rcu_read_lock();
+ ei->pid = get_pid(find_pid_ns(1, ns));
+ rcu_read_unlock();
+ }
+
+ sb->s_flags |= MS_ACTIVE;
+ ns->proc_mnt = mnt;
+ }
+
+ return simple_set_mnt(mnt, sb);
+}
+
+static void proc_kill_sb(struct super_block *sb)
+{
+ struct pid_namespace *ns;
+
+ ns = (struct pid_namespace *)sb->s_fs_info;
+ kill_anon_super(sb);
+ put_pid_ns(ns);
}
static struct file_system_type proc_fs_type = {
.name = "proc",
.get_sb = proc_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = proc_kill_sb,
};
void __init proc_root_init(void)
@@ -54,12 +112,13 @@ void __init proc_root_init(void)
err = register_filesystem(&proc_fs_type);
if (err)
return;
- proc_mnt = kern_mount(&proc_fs_type);
+ proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
err = PTR_ERR(proc_mnt);
if (IS_ERR(proc_mnt)) {
unregister_filesystem(&proc_fs_type);
return;
}
+
proc_misc_init();
proc_net_init();
@@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = {
.parent = &proc_root,
};
+int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+ struct vfsmount *mnt;
+
+ mnt = kern_mount_data(&proc_fs_type, ns);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ return 0;
+}
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+ mntput(ns->proc_mnt);
+}
+
EXPORT_SYMBOL(proc_symlink);
EXPORT_SYMBOL(proc_mkdir);
EXPORT_SYMBOL(create_proc_entry);