summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/ipc_sysctl.c36
-rw-r--r--ipc/mqueue.c129
-rw-r--r--ipc/sem.c69
-rw-r--r--ipc/shm.c117
4 files changed, 280 insertions, 71 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 56410faa4550..00fba2bab87d 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
+
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
+static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table ipc_table;
+
+ memcpy(&ipc_table, table, sizeof(ipc_table));
+ ipc_table.data = get_ipc(table);
+
+ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+}
+
+static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+ int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (err < 0)
+ return err;
+ if (ns->shm_rmid_forced)
+ shm_destroy_orphaned(ns);
+ return err;
+}
+
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
+#define proc_ipc_dointvec_minmax NULL
+#define proc_ipc_dointvec_minmax_orphans NULL
#define proc_ipc_callback_dointvec NULL
#define proc_ipcauto_dointvec_minmax NULL
#endif
@@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = {
.proc_handler = proc_ipc_dointvec,
},
{
+ .procname = "shm_rmid_forced",
+ .data = &init_ipc_ns.shm_rmid_forced,
+ .maxlen = sizeof(init_ipc_ns.shm_rmid_forced),
+ .mode = 0644,
+ .proc_handler = proc_ipc_dointvec_minmax_orphans,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 14fb6d67e6a3..2e0ecfcc881d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -113,72 +113,75 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
{
struct user_struct *u = current_user();
struct inode *inode;
+ int ret = -ENOMEM;
inode = new_inode(sb);
- if (inode) {
- inode->i_ino = get_next_ino();
- inode->i_mode = mode;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_mtime = inode->i_ctime = inode->i_atime =
- CURRENT_TIME;
+ if (!inode)
+ goto err;
- if (S_ISREG(mode)) {
- struct mqueue_inode_info *info;
- struct task_struct *p = current;
- unsigned long mq_bytes, mq_msg_tblsz;
-
- inode->i_fop = &mqueue_file_operations;
- inode->i_size = FILENT_SIZE;
- /* mqueue specific info */
- info = MQUEUE_I(inode);
- spin_lock_init(&info->lock);
- init_waitqueue_head(&info->wait_q);
- INIT_LIST_HEAD(&info->e_wait_q[0].list);
- INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->notify_owner = NULL;
- info->qsize = 0;
- info->user = NULL; /* set when all is ok */
- memset(&info->attr, 0, sizeof(info->attr));
- info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
- info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
- if (attr) {
- info->attr.mq_maxmsg = attr->mq_maxmsg;
- info->attr.mq_msgsize = attr->mq_msgsize;
- }
- mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
- info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
- if (!info->messages)
- goto out_inode;
-
- mq_bytes = (mq_msg_tblsz +
- (info->attr.mq_maxmsg * info->attr.mq_msgsize));
-
- spin_lock(&mq_lock);
- if (u->mq_bytes + mq_bytes < u->mq_bytes ||
- u->mq_bytes + mq_bytes >
- task_rlimit(p, RLIMIT_MSGQUEUE)) {
- spin_unlock(&mq_lock);
- /* mqueue_evict_inode() releases info->messages */
- goto out_inode;
- }
- u->mq_bytes += mq_bytes;
- spin_unlock(&mq_lock);
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+
+ if (S_ISREG(mode)) {
+ struct mqueue_inode_info *info;
+ struct task_struct *p = current;
+ unsigned long mq_bytes, mq_msg_tblsz;
+
+ inode->i_fop = &mqueue_file_operations;
+ inode->i_size = FILENT_SIZE;
+ /* mqueue specific info */
+ info = MQUEUE_I(inode);
+ spin_lock_init(&info->lock);
+ init_waitqueue_head(&info->wait_q);
+ INIT_LIST_HEAD(&info->e_wait_q[0].list);
+ INIT_LIST_HEAD(&info->e_wait_q[1].list);
+ info->notify_owner = NULL;
+ info->qsize = 0;
+ info->user = NULL; /* set when all is ok */
+ memset(&info->attr, 0, sizeof(info->attr));
+ info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+ info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+ if (attr) {
+ info->attr.mq_maxmsg = attr->mq_maxmsg;
+ info->attr.mq_msgsize = attr->mq_msgsize;
+ }
+ mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+ if (!info->messages)
+ goto out_inode;
- /* all is ok */
- info->user = get_uid(u);
- } else if (S_ISDIR(mode)) {
- inc_nlink(inode);
- /* Some things misbehave if size == 0 on a directory */
- inode->i_size = 2 * DIRENT_SIZE;
- inode->i_op = &mqueue_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ mq_bytes = (mq_msg_tblsz +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+ spin_lock(&mq_lock);
+ if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+ u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
+ spin_unlock(&mq_lock);
+ /* mqueue_evict_inode() releases info->messages */
+ ret = -EMFILE;
+ goto out_inode;
}
+ u->mq_bytes += mq_bytes;
+ spin_unlock(&mq_lock);
+
+ /* all is ok */
+ info->user = get_uid(u);
+ } else if (S_ISDIR(mode)) {
+ inc_nlink(inode);
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * DIRENT_SIZE;
+ inode->i_op = &mqueue_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
}
+
return inode;
out_inode:
iput(inode);
- return NULL;
+err:
+ return ERR_PTR(ret);
}
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
@@ -194,8 +197,8 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
NULL);
- if (!inode) {
- error = -ENOMEM;
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
goto out;
}
@@ -315,8 +318,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
spin_unlock(&mq_lock);
inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
- if (!inode) {
- error = -ENOMEM;
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
spin_lock(&mq_lock);
ipc_ns->mq_queues_count--;
goto out_unlock;
@@ -446,8 +449,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&info->lock);
- time = schedule_hrtimeout_range_clock(timeout,
- HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
+ time = schedule_hrtimeout_range_clock(timeout, 0,
+ HRTIMER_MODE_ABS, CLOCK_REALTIME);
while (ewp->state == STATE_PENDING)
cpu_relax();
diff --git a/ipc/sem.c b/ipc/sem.c
index 8b929e6a6eda..5215a81420df 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -90,6 +90,52 @@
#include <asm/uaccess.h>
#include "util.h"
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+ int semval; /* current value */
+ int sempid; /* pid of last operation */
+ struct list_head sem_pending; /* pending single-sop operations */
+};
+
+/* One queue for each sleeping process in the system. */
+struct sem_queue {
+ struct list_head simple_list; /* queue of pending operations */
+ struct list_head list; /* queue of pending operations */
+ struct task_struct *sleeper; /* this process */
+ struct sem_undo *undo; /* undo structure */
+ int pid; /* process id of requesting process */
+ int status; /* completion status of operation */
+ struct sembuf *sops; /* array of pending operations */
+ int nsops; /* number of operations */
+ int alter; /* does *sops alter the array? */
+};
+
+/* Each task has a list of undo requests. They are executed automatically
+ * when the process exits.
+ */
+struct sem_undo {
+ struct list_head list_proc; /* per-process list: *
+ * all undos from one process
+ * rcu protected */
+ struct rcu_head rcu; /* rcu struct for sem_undo */
+ struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
+ struct list_head list_id; /* per semaphore array list:
+ * all undos for one array */
+ int semid; /* semaphore set identifier */
+ short *semadj; /* array of adjustments */
+ /* one per semaphore */
+};
+
+/* sem_undo_list controls shared access to the list of sem_undo structures
+ * that may be shared among all a CLONE_SYSVSEM task group.
+ */
+struct sem_undo_list {
+ atomic_t refcnt;
+ spinlock_t lock;
+ struct list_head list_proc;
+};
+
+
#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
@@ -1426,6 +1472,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
queue.status = -EINTR;
queue.sleeper = current;
+
+sleep_again:
current->state = TASK_INTERRUPTIBLE;
sem_unlock(sma);
@@ -1450,15 +1498,23 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
}
sma = sem_lock(ns, semid);
+
+ /*
+ * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
+ */
+ error = get_queue_result(&queue);
+
+ /*
+ * Array removed? If yes, leave without sem_unlock().
+ */
if (IS_ERR(sma)) {
- error = -EIDRM;
goto out_free;
}
- error = get_queue_result(&queue);
/*
- * If queue.status != -EINTR we are woken up by another process
+ * If queue.status != -EINTR we are woken up by another process.
+ * Leave without unlink_queue(), but with sem_unlock().
*/
if (error != -EINTR) {
@@ -1470,6 +1526,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
+
+ /*
+ * If the wakeup was spurious, just retry
+ */
+ if (error == -EINTR && !signal_pending(current))
+ goto sleep_again;
+
unlink_queue(sma, &queue);
out_unlock_free:
diff --git a/ipc/shm.c b/ipc/shm.c
index 27884adb1a90..02ecf2c078fc 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
ns->shm_ctlmax = SHMMAX;
ns->shm_ctlall = SHMALL;
ns->shm_ctlmni = SHMMNI;
+ ns->shm_rmid_forced = 0;
ns->shm_tot = 0;
ipc_init_ids(&shm_ids(ns));
}
@@ -104,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns)
}
#endif
-void __init shm_init (void)
+static int __init ipc_ns_init(void)
{
shm_init_ns(&init_ipc_ns);
+ return 0;
+}
+
+pure_initcall(ipc_ns_init);
+
+void __init shm_init (void)
+{
ipc_init_proc_interface("sysvipc/shm",
#if BITS_PER_LONG <= 32
" key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
@@ -130,6 +138,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
return container_of(ipcp, struct shmid_kernel, shm_perm);
}
+static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
+{
+ rcu_read_lock();
+ spin_lock(&ipcp->shm_perm.lock);
+}
+
static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
int id)
{
@@ -187,6 +201,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
}
/*
+ * shm_may_destroy - identifies whether shm segment should be destroyed now
+ *
+ * Returns true if and only if there are no active users of the segment and
+ * one of the following is true:
+ *
+ * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
+ *
+ * 2) sysctl kernel.shm_rmid_forced is set to 1.
+ */
+static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+ return (shp->shm_nattch == 0) &&
+ (ns->shm_rmid_forced ||
+ (shp->shm_perm.mode & SHM_DEST));
+}
+
+/*
* remove the attach descriptor vma.
* free memory for segment if it is marked destroyed.
* The descriptor has already been removed from the current->mm->mmap list
@@ -206,14 +237,90 @@ static void shm_close(struct vm_area_struct *vma)
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
shp->shm_nattch--;
- if(shp->shm_nattch == 0 &&
- shp->shm_perm.mode & SHM_DEST)
+ if (shm_may_destroy(ns, shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
up_write(&shm_ids(ns).rw_mutex);
}
+/* Called with ns->shm_ids(ns).rw_mutex locked */
+static int shm_try_destroy_current(int id, void *p, void *data)
+{
+ struct ipc_namespace *ns = data;
+ struct kern_ipc_perm *ipcp = p;
+ struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+
+ if (shp->shm_creator != current)
+ return 0;
+
+ /*
+ * Mark it as orphaned to destroy the segment when
+ * kernel.shm_rmid_forced is changed.
+ * It is noop if the following shm_may_destroy() returns true.
+ */
+ shp->shm_creator = NULL;
+
+ /*
+ * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
+ * is not set, it shouldn't be deleted here.
+ */
+ if (!ns->shm_rmid_forced)
+ return 0;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ return 0;
+}
+
+/* Called with ns->shm_ids(ns).rw_mutex locked */
+static int shm_try_destroy_orphaned(int id, void *p, void *data)
+{
+ struct ipc_namespace *ns = data;
+ struct kern_ipc_perm *ipcp = p;
+ struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+
+ /*
+ * We want to destroy segments without users and with already
+ * exit'ed originating process.
+ *
+ * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
+ */
+ if (shp->shm_creator != NULL)
+ return 0;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ return 0;
+}
+
+void shm_destroy_orphaned(struct ipc_namespace *ns)
+{
+ down_write(&shm_ids(ns).rw_mutex);
+ if (shm_ids(ns).in_use)
+ idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
+ up_write(&shm_ids(ns).rw_mutex);
+}
+
+
+void exit_shm(struct task_struct *task)
+{
+ struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+
+ if (shm_ids(ns).in_use == 0)
+ return;
+
+ /* Destroy all already created segments, but not mapped yet */
+ down_write(&shm_ids(ns).rw_mutex);
+ if (shm_ids(ns).in_use)
+ idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+ up_write(&shm_ids(ns).rw_mutex);
+}
+
static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct file *file = vma->vm_file;
@@ -404,6 +511,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_segsz = size;
shp->shm_nattch = 0;
shp->shm_file = file;
+ shp->shm_creator = current;
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
* proc-ps tools use this. Changing this will break them.
@@ -950,8 +1058,7 @@ out_nattch:
shp = shm_lock(ns, shmid);
BUG_ON(IS_ERR(shp));
shp->shm_nattch--;
- if(shp->shm_nattch == 0 &&
- shp->shm_perm.mode & SHM_DEST)
+ if (shm_may_destroy(ns, shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);