From 734550921e9b7ab924a43aa3d0bd4239dac4fbf1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Jul 2008 21:22:20 -0400 Subject: [PATCH] beginning of sysctl cleanup - ctl_table_set New object: set of sysctls [currently - root and per-net-ns]. Contains: pointer to parent set, list of tables and "should I see this set?" method (->is_seen(set)). Current lists of tables are subsumed by that; net-ns contains such a beast. ->lookup() for ctl_table_root returns pointer to ctl_table_set instead of that to ->list of that ctl_table_set. [folded compile fixes by rdd for configs without sysctl] Signed-off-by: Al Viro --- kernel/sysctl.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 35a50db9b6c..8ee4a0619fb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -160,12 +160,13 @@ static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list), + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .header_list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), }; static struct ctl_table kern_table[]; @@ -1403,14 +1404,20 @@ void sysctl_head_finish(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + static struct list_head * lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) { - struct list_head *header_list; - header_list = &root->header_list; - if (root->lookup) - header_list = root->lookup(root, namespaces); - return header_list; + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; } struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, @@ -1720,7 +1727,6 @@ struct ctl_table_header *__register_sysctl_paths( struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table) { - struct list_head *header_list; struct ctl_table_header *header; struct ctl_table *new, **prevp; unsigned int n, npath; @@ -1772,8 +1778,8 @@ struct ctl_table_header *__register_sysctl_paths( } #endif spin_lock(&sysctl_lock); - header_list = lookup_header_list(root, namespaces); - list_add_tail(&header->ctl_entry, header_list); + header->set = lookup_header_set(root, namespaces); + list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; @@ -1832,6 +1838,15 @@ void unregister_sysctl_table(struct ctl_table_header * header) kfree(header); } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + #else /* !CONFIG_SYSCTL */ struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { @@ -1848,6 +1863,12 @@ void unregister_sysctl_table(struct ctl_table_header * table) { } +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + #endif /* CONFIG_SYSCTL */ /* -- cgit v1.2.3 From f7e6ced4061da509f737541ca4dbd44d83a6e82f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 01:44:23 -0400 Subject: [PATCH] allow delayed freeing of ctl_table_header Refcount the sucker; instead of freeing it by the end of unregistration just drop the refcount and free only when it hits zero. Make sure that we _always_ make ->unregistering non-NULL in start_unregistering(). That allows anybody to get a reference to such puppy, preventing its freeing and reuse. It does *not* block unregistration. Anybody who holds such a reference can * try to grab a "use" reference (ctl_head_grab()); that will succeeds if and only if it hadn't entered unregistration yet. If it succeeds, we can use it in all normal ways until we release the "use" reference (with ctl_head_finish()). Note that this relies on having ->unregistering become non-NULL in all cases when one starts to unregister the sucker. * keep pointers to ctl_table entries; they *can* be freed if the entire thing is unregistered. However, if ctl_head_grab() succeeds, we know that unregistration had not happened (and will not happen until ctl_head_finish()) and such pointers can be used safely. IOW, now we can have inodes under /proc/sys keep references to ctl_table entries, protecting them with references to ctl_table_header and grabbing the latter for the duration of operations that require access to ctl_table. That won't cause deadlocks, since unregistration will not be stopped by mere keeping a reference to ctl_table_header. Signed-off-by: Al Viro --- include/linux/sysctl.h | 6 ++++++ kernel/sysctl.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index c1e0cf408af..956264d09ba 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -957,6 +957,11 @@ extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); +struct ctl_table_header; + +extern void sysctl_head_get(struct ctl_table_header *); +extern void sysctl_head_put(struct ctl_table_header *); +extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev); @@ -1073,6 +1078,7 @@ struct ctl_table_header struct ctl_table *ctl_table; struct list_head ctl_entry; int used; + int count; struct completion *unregistering; struct ctl_table *ctl_table_arg; struct ctl_table_root *root; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8ee4a0619fb..60d9357e717 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1387,6 +1387,9 @@ static void start_unregistering(struct ctl_table_header *p) spin_unlock(&sysctl_lock); wait_for_completion(&wait); spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); } /* * do not remove from the list until nobody holds it; walking the @@ -1395,6 +1398,32 @@ static void start_unregistering(struct ctl_table_header *p) list_del_init(&p->ctl_entry); } +void sysctl_head_get(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree(head); + spin_unlock(&sysctl_lock); +} + +struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + void sysctl_head_finish(struct ctl_table_header *head) { if (!head) @@ -1771,6 +1800,7 @@ struct ctl_table_header *__register_sysctl_paths( header->unregistering = NULL; header->root = root; sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; #ifdef CONFIG_SYSCTL_SYSCALL_CHECK if (sysctl_check_table(namespaces, header->ctl_table)) { kfree(header); @@ -1834,8 +1864,9 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_lock(&sysctl_lock); start_unregistering(header); + if (!--header->count) + kfree(header); spin_unlock(&sysctl_lock); - kfree(header); } void setup_sysctl_set(struct ctl_table_set *p, @@ -1869,6 +1900,10 @@ void setup_sysctl_set(struct ctl_table_set *p, { } +void sysctl_head_put(struct ctl_table_header *head) +{ +} + #endif /* CONFIG_SYSCTL */ /* -- cgit v1.2.3 From ae7edecc9b8810770a8e5cb9a466ea4bdcfa8401 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 06:33:31 -0400 Subject: [PATCH] sysctl: keep track of tree relationships In a sense, that's the heart of the series. It's based on the following property of the trees we are actually asked to add: they can be split into stem that is already covered by registered trees and crown that is entirely new. IOW, if a/b and a/c/d are introduced by our tree, then a/c is also introduced by it. That allows to associate tree and table entry with each node in the union; while directory nodes might be covered by many trees, only one will cover the node by its crown. And that will allow much saner logics for /proc/sys in the next patches. This patch introduces the data structures needed to keep track of that. When adding a sysctl table, we find a "parent" one. Which is to say, find the deepest node on its stem that already is present in one of the tables from our table set or its ancestor sets. That table will be our parent and that node in it - attachment point. Add our table to list anchored in parent, have it refer the parent and contents of attachment point. Also remember where its crown lives. Signed-off-by: Al Viro --- include/linux/sysctl.h | 3 +++ kernel/sysctl.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 956264d09ba..3f6599aeb0d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1083,6 +1083,9 @@ struct ctl_table_header struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; + struct ctl_table *attached_by; + struct ctl_table *attached_to; + struct ctl_table_header *parent; }; /* struct ctl_path describes where in the hierarchy a table is added */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 60d9357e717..c9a0af88703 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1680,6 +1680,52 @@ static __init int sysctl_init(void) core_initcall(sysctl_init); +static int is_branch_in(struct ctl_table *branch, struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return 0; + + /* ... and nothing else */ + if (branch[1].procname || branch[1].ctl_name) + return 0; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname || p->ctl_name; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return 1; + } + return 0; +} + +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while (is_branch_in(by, to)) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = to->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; + } +} + /** * __register_sysctl_paths - register a sysctl hierarchy * @root: List of sysctl headers to register on @@ -1759,6 +1805,7 @@ struct ctl_table_header *__register_sysctl_paths( struct ctl_table_header *header; struct ctl_table *new, **prevp; unsigned int n, npath; + struct ctl_table_set *set; /* Count the path components */ for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) @@ -1809,6 +1856,18 @@ struct ctl_table_header *__register_sysctl_paths( #endif spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); @@ -1864,6 +1923,10 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_lock(&sysctl_lock); start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree(header->parent); + } if (!--header->count) kfree(header); spin_unlock(&sysctl_lock); -- cgit v1.2.3 From 9043476f726802f4b00c96d0c4f418dde48d1304 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 08:54:06 -0400 Subject: [PATCH] sanitize proc_sysctl * keep references to ctl_table_head and ctl_table in /proc/sys inodes * grab the former during operations, use the latter for access to entry if that succeeds * have ->d_compare() check if table should be seen for one who does lookup; that allows us to avoid flipping inodes - if we have the same name resolve to different things, we'll just keep several dentries and ->d_compare() will reject the wrong ones. * have ->lookup() and ->readdir() scan the table of our inode first, then walk all ctl_table_header and scan ->attached_by for those that are attached to our directory. * implement ->getattr(). * get rid of insane amounts of tree-walking * get rid of the need to know dentry in ->permission() and of the contortions induced by that. Signed-off-by: Al Viro --- fs/proc/inode.c | 5 + fs/proc/proc_sysctl.c | 427 ++++++++++++++++++++++-------------------------- include/linux/proc_fs.h | 5 + include/linux/sysctl.h | 1 + kernel/sysctl.c | 15 ++ 5 files changed, 218 insertions(+), 235 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b37f25dc45a..8bb03f056c2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode) module_put(de->owner); de_put(de); } + if (PROC_I(inode)->sysctl) + sysctl_head_put(PROC_I(inode)->sysctl); clear_inode(inode); } @@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->fd = 0; ei->op.proc_get_link = NULL; ei->pde = NULL; + ei->sysctl = NULL; + ei->sysctl_entry = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5acc001d49f..fa1ec2433e4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -10,149 +10,110 @@ static struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; static const struct inode_operations proc_sys_inode_operations; +static const struct file_operations proc_sys_dir_file_operations; +static const struct inode_operations proc_sys_dir_operations; -static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) -{ - /* Refresh the cached information bits in the inode */ - if (table) { - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_mode = table->mode; - if (table->proc_handler) { - inode->i_mode |= S_IFREG; - inode->i_nlink = 1; - } else { - inode->i_mode |= S_IFDIR; - inode->i_nlink = 0; /* It is too hard to figure out */ - } - } -} - -static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) +static struct inode *proc_sys_make_inode(struct super_block *sb, + struct ctl_table_header *head, struct ctl_table *table) { struct inode *inode; - struct proc_inode *dir_ei, *ei; - int depth; + struct proc_inode *ei; - inode = new_inode(dir->i_sb); + inode = new_inode(sb); if (!inode) goto out; - /* A directory is always one deeper than it's parent */ - dir_ei = PROC_I(dir); - depth = dir_ei->fd + 1; - + sysctl_head_get(head); ei = PROC_I(inode); - ei->fd = depth; + ei->sysctl = head; + ei->sysctl_entry = table; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &proc_sys_inode_operations; - inode->i_fop = &proc_sys_file_operations; inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ - proc_sys_refresh_inode(inode, table); + inode->i_mode = table->mode; + if (!table->child) { + inode->i_mode |= S_IFREG; + inode->i_op = &proc_sys_inode_operations; + inode->i_fop = &proc_sys_file_operations; + } else { + inode->i_mode |= S_IFDIR; + inode->i_nlink = 0; + inode->i_op = &proc_sys_dir_operations; + inode->i_fop = &proc_sys_dir_file_operations; + } out: return inode; } -static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) -{ - for (;;) { - struct proc_inode *ei; - - ei = PROC_I(dentry->d_inode); - if (ei->fd == depth) - break; /* found */ - - dentry = dentry->d_parent; - } - return dentry; -} - -static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, - struct qstr *name) +static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { int len; - for ( ; table->ctl_name || table->procname; table++) { + for ( ; p->ctl_name || p->procname; p++) { - if (!table->procname) + if (!p->procname) continue; - len = strlen(table->procname); + len = strlen(p->procname); if (len != name->len) continue; - if (memcmp(table->procname, name->name, len) != 0) + if (memcmp(p->procname, name->name, len) != 0) continue; /* I have a match */ - return table; + return p; } return NULL; } -static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, - struct ctl_table *table) +struct ctl_table_header *grab_header(struct inode *inode) { - struct dentry *ancestor; - struct proc_inode *ei; - int depth, i; + if (PROC_I(inode)->sysctl) + return sysctl_head_grab(PROC_I(inode)->sysctl); + else + return sysctl_head_next(NULL); +} - ei = PROC_I(dentry->d_inode); - depth = ei->fd; +static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct ctl_table_header *head = grab_header(dir); + struct ctl_table *table = PROC_I(dir)->sysctl_entry; + struct ctl_table_header *h = NULL; + struct qstr *name = &dentry->d_name; + struct ctl_table *p; + struct inode *inode; + struct dentry *err = ERR_PTR(-ENOENT); - if (depth == 0) - return table; + if (IS_ERR(head)) + return ERR_CAST(head); - for (i = 1; table && (i <= depth); i++) { - ancestor = proc_sys_ancestor(dentry, i); - table = proc_sys_lookup_table_one(table, &ancestor->d_name); - if (table) - table = table->child; + if (table && !table->child) { + WARN_ON(1); + goto out; } - return table; - -} -static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, - struct qstr *name, - struct ctl_table *table) -{ - table = proc_sys_lookup_table(dparent, table); - if (table) - table = proc_sys_lookup_table_one(table, name); - return table; -} -static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, - struct qstr *name, - struct ctl_table_header **ptr) -{ - struct ctl_table_header *head; - struct ctl_table *table = NULL; + table = table ? table->child : head->ctl_table; - for (head = sysctl_head_next(NULL); head; - head = sysctl_head_next(head)) { - table = proc_sys_lookup_entry(parent, name, head->ctl_table); - if (table) - break; + p = find_in_table(table, name); + if (!p) { + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) + continue; + p = find_in_table(h->attached_by, name); + if (p) + break; + } } - *ptr = head; - return table; -} - -static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct ctl_table_header *head; - struct inode *inode; - struct dentry *err; - struct ctl_table *table; - err = ERR_PTR(-ENOENT); - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - if (!table) + if (!p) goto out; err = ERR_PTR(-ENOMEM); - inode = proc_sys_make_inode(dir, table); + inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); + if (h) + sysctl_head_finish(h); + if (!inode) goto out; @@ -168,22 +129,14 @@ out: static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, size_t count, loff_t *ppos, int write) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; + struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; ssize_t error; size_t res; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; + if (IS_ERR(head)) + return PTR_ERR(head); /* * At this point we know that the sysctl was not unregistered @@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; + /* if that can happen at all, it should be -EINVAL, not -EISDIR */ + error = -EINVAL; + if (!table->proc_handler) + goto out; + /* careful: calling conventions are nasty here */ res = count; error = table->proc_handler(table, write, filp, buf, &res, ppos); @@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct ctl_table *table) + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) { - struct ctl_table_header *head; - struct ctl_table *child_table = NULL; struct dentry *child, *dir = filp->f_path.dentry; struct inode *inode; struct qstr qname; ino_t ino = 0; unsigned type = DT_UNKNOWN; - int ret; qname.name = table->procname; qname.len = strlen(table->procname); qname.hash = full_name_hash(qname.name, qname.len); - /* Suppress duplicates. - * Only fill a directory entry if it is the value that - * an ordinary lookup of that name returns. Hide all - * others. - * - * If we ever cache this translation in the dcache - * I should do a dcache lookup first. But for now - * it is just simpler not to. - */ - ret = 0; - child_table = do_proc_sys_lookup(dir, &qname, &head); - sysctl_head_finish(head); - if (child_table != table) - return 0; - child = d_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - inode = proc_sys_make_inode(dir->d_inode, table); - if (!inode) - child = ERR_PTR(-ENOMEM); - else { - new->d_op = &proc_sys_dentry_operations; - d_add(new, inode); + child = d_alloc(dir, &qname); + if (child) { + inode = proc_sys_make_inode(dir->d_sb, head, table); + if (!inode) { + dput(child); + return -ENOMEM; + } else { + child->d_op = &proc_sys_dentry_operations; + d_add(child, inode); } - if (child) - dput(new); - else - child = new; + } else { + return -ENOMEM; } } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; - } + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); -end_instantiate: - if (!ino) - ino= find_inode_number(dir, &qname); - if (!ino) - ino = 1; - return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); + return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); +} + +static int scan(struct ctl_table_header *head, ctl_table *table, + unsigned long *pos, struct file *file, + void *dirent, filldir_t filldir) +{ + + for (; table->ctl_name || table->procname; table++, (*pos)++) { + int res; + + /* Can't do anything without a proc name */ + if (!table->procname) + continue; + + if (*pos < file->f_pos) + continue; + + res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (res) + return res; + + file->f_pos = *pos + 1; + } + return 0; } static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct dentry *dentry = filp->f_dentry; + struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - struct ctl_table_header *head = NULL; - struct ctl_table *table; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *h = NULL; unsigned long pos; - int ret; + int ret = -EINVAL; + + if (IS_ERR(head)) + return PTR_ERR(head); - ret = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) + if (table && !table->child) { + WARN_ON(1); goto out; + } + + table = table ? table->child : head->ctl_table; ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - /* - Find each instance of the directory - * - Read all entries in each instance - * - Before returning an entry to user space lookup the entry - * by name and if I find a different entry don't return - * this one because it means it is a buried dup. - * For sysctl this should only happen for directory entries. - */ - for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { - table = proc_sys_lookup_table(dentry, head->ctl_table); + ret = scan(head, table, &pos, filp, dirent, filldir); + if (ret) + goto out; - if (!table) + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) continue; - - for (; table->ctl_name || table->procname; table++, pos++) { - /* Can't do anything without a proc name */ - if (!table->procname) - continue; - - if (pos < filp->f_pos) - continue; - - if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) - goto out; - filp->f_pos = pos + 1; + ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); + if (ret) { + sysctl_head_finish(h); + break; } } ret = 1; @@ -349,47 +298,18 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head; - struct ctl_table *table; - struct dentry *dentry; - int mode; - int depth; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; int error; - head = NULL; - depth = PROC_I(inode)->fd; - - /* First check the cached permissions, in case we don't have - * enough information to lookup the sysctl table entry. - */ - error = -EACCES; - mode = inode->i_mode; - - if (current->euid == 0) - mode >>= 6; - else if (in_group_p(0)) - mode >>= 3; - - if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) - error = 0; - - /* If we can't get a sysctl table entry the permission - * checks on the cached mode will have to be enough. - */ - if (!nd || !depth) - goto out; + if (IS_ERR(head)) + return PTR_ERR(head); - dentry = nd->path.dentry; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + if (!table) /* global root - r-xr-xr-x */ + error = mask & MAY_WRITE ? -EACCES : 0; + else /* Use the permissions on the sysctl table entry */ + error = sysctl_perm(head->root, table, mask); - /* If the entry does not exist deny permission */ - error = -EACCES; - if (!table) - goto out; - - /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(head->root, table, mask); -out: sysctl_head_finish(head); return error; } @@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return error; } -/* I'm lazy and don't distinguish between files and directories, - * until access time. - */ +static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + + if (IS_ERR(head)) + return PTR_ERR(head); + + generic_fillattr(inode, stat); + if (table) + stat->mode = (stat->mode & S_IFMT) | table->mode; + + sysctl_head_finish(head); + return 0; +} + static const struct file_operations proc_sys_file_operations = { .read = proc_sys_read, .write = proc_sys_write, +}; + +static const struct file_operations proc_sys_dir_file_operations = { .readdir = proc_sys_readdir, }; static const struct inode_operations proc_sys_inode_operations = { + .permission = proc_sys_permission, + .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, +}; + +static const struct inode_operations proc_sys_dir_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, }; static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct ctl_table_header *head; - struct ctl_table *table; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - proc_sys_refresh_inode(dentry->d_inode, table); - sysctl_head_finish(head); - return !!table; + return !PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_delete(struct dentry *dentry) +{ + return !!PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, + struct qstr *name) +{ + struct dentry *dentry = container_of(qstr, struct dentry, d_name); + if (qstr->len != name->len) + return 1; + if (memcmp(qstr->name, name->name, name->len)) + return 1; + return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); } static struct dentry_operations proc_sys_dentry_operations = { .d_revalidate = proc_sys_revalidate, + .d_delete = proc_sys_delete, + .d_compare = proc_sys_compare, }; static struct proc_dir_entry *proc_sys_root; @@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root; int proc_sys_init(void) { proc_sys_root = proc_mkdir("sys", NULL); - proc_sys_root->proc_iops = &proc_sys_inode_operations; - proc_sys_root->proc_fops = &proc_sys_file_operations; + proc_sys_root->proc_iops = &proc_sys_dir_operations; + proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; return 0; } diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index f560d1705af..fb61850d1cf 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -282,11 +282,16 @@ union proc_op { struct task_struct *task); }; +struct ctl_table_header; +struct ctl_table; + struct proc_inode { struct pid *pid; int fd; union proc_op op; struct proc_dir_entry *pde; + struct ctl_table_header *sysctl; + struct ctl_table *sysctl_entry; struct inode vfs_inode; }; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3f6599aeb0d..d0437f36921 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -961,6 +961,7 @@ struct ctl_table_header; extern void sysctl_head_get(struct ctl_table_header *); extern void sysctl_head_put(struct ctl_table_header *); +extern int sysctl_is_seen(struct ctl_table_header *); extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c9a0af88703..ff5abcca5dd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1932,6 +1932,21 @@ void unregister_sysctl_table(struct ctl_table_header * header) spin_unlock(&sysctl_lock); } +int sysctl_is_seen(struct ctl_table_header *p) +{ + struct ctl_table_set *set = p->set; + int res; + spin_lock(&sysctl_lock); + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); + spin_unlock(&sysctl_lock); + return res; +} + void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) -- cgit v1.2.3 From e6305c43eda10ebfd2ad9e35d6e172ccc7bb3695 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Jul 2008 21:03:57 -0400 Subject: [PATCH] sanitize ->permission() prototype * kill nameidata * argument; map the 3 bits in ->flags anybody cares about to new MAY_... ones and pass with the mask. * kill redundant gfs2_iop_permission() * sanitize ecryptfs_permission() * fix remaining places where ->permission() instances might barf on new MAY_... found in mask. The obvious next target in that direction is permission(9) folded fix for nfs_permission() breakage from Miklos Szeredi Signed-off-by: Al Viro --- fs/afs/internal.h | 4 +--- fs/afs/security.c | 2 +- fs/bad_inode.c | 3 +-- fs/cifs/cifsfs.c | 2 +- fs/coda/dir.c | 4 +++- fs/coda/pioctl.c | 6 ++---- fs/ecryptfs/inode.c | 17 ++--------------- fs/ext2/acl.c | 2 +- fs/ext2/acl.h | 2 +- fs/ext3/acl.c | 2 +- fs/ext3/acl.h | 2 +- fs/ext4/acl.c | 2 +- fs/ext4/acl.h | 2 +- fs/fuse/dir.c | 6 +++--- fs/gfs2/ops_inode.c | 12 +++--------- fs/hfs/inode.c | 3 +-- fs/hfsplus/inode.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/jffs2/acl.c | 2 +- fs/jffs2/acl.h | 2 +- fs/jfs/acl.c | 2 +- fs/jfs/jfs_acl.h | 2 +- fs/namei.c | 23 +++++++++++++++++------ fs/nfs/dir.c | 11 +++++------ fs/ocfs2/file.c | 2 +- fs/ocfs2/file.h | 3 +-- fs/proc/base.c | 3 +-- fs/proc/proc_sysctl.c | 2 +- fs/reiserfs/xattr.c | 2 +- fs/smbfs/file.c | 4 ++-- fs/xfs/linux-2.6/xfs_iops.c | 3 +-- include/linux/coda_linux.h | 2 +- include/linux/fs.h | 5 ++++- include/linux/nfs_fs.h | 2 +- include/linux/reiserfs_xattr.h | 2 +- include/linux/shmem_fs.h | 2 +- kernel/sysctl.c | 10 +++++----- mm/shmem_acl.c | 2 +- 38 files changed, 74 insertions(+), 87 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7102824ba84..3cb6920ff30 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *); extern const struct inode_operations afs_dir_inode_operations; extern const struct file_operations afs_dir_file_operations; -extern int afs_permission(struct inode *, int, struct nameidata *); - /* * file.c */ @@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); -extern int afs_permission(struct inode *, int, struct nameidata *); +extern int afs_permission(struct inode *, int); /* * server.c diff --git a/fs/afs/security.c b/fs/afs/security.c index 3bcbeceba1b..3ef50437003 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct inode *inode, int mask, struct nameidata *nd) +int afs_permission(struct inode *inode, int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t uninitialized_var(access); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f1c2ea8342f..5f1538c03b1 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, return -EIO; } -static int bad_inode_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int bad_inode_permission(struct inode *inode, int mask) { return -EIO; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fe5f6809cba..1ec7076f7b2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) +static int cifs_permission(struct inode *inode, int mask) { struct cifs_sb_info *cifs_sb; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 3d2580e00a3..c5916228243 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -137,9 +137,11 @@ exit: } -int coda_permission(struct inode *inode, int mask, struct nameidata *nd) +int coda_permission(struct inode *inode, int mask) { int error = 0; + + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (!mask) return 0; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c21a1f552a6..c38a98974fb 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,8 +24,7 @@ #include /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask, - struct nameidata *nd); +static int coda_ioctl_permission(struct inode *inode, int mask); static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data); @@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int coda_ioctl_permission(struct inode *inode, int mask) { return 0; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d755455e3bf..32f4228efcd 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -830,22 +830,9 @@ out: } static int -ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) +ecryptfs_permission(struct inode *inode, int mask) { - int rc; - - if (nd) { - struct vfsmount *vfsmnt_save = nd->path.mnt; - struct dentry *dentry_save = nd->path.dentry; - - nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry); - nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry); - rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); - nd->path.mnt = vfsmnt_save; - nd->path.dentry = dentry_save; - } else - rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); - return rc; + return permission(ecryptfs_inode_to_lower(inode), mask, NULL); } /** diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index e58669e1b87..ae8c4f850b2 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask) } int -ext2_permission(struct inode *inode, int mask, struct nameidata *nd) +ext2_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext2_check_acl); } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 0bde85bafe3..b42cf578554 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size) #define EXT2_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext2_permission (struct inode *, int, struct nameidata *); +extern int ext2_permission (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index a754d184817..b60bb241880 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask) } int -ext3_permission(struct inode *inode, int mask, struct nameidata *nd) +ext3_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext3_check_acl); } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 0d1e6279cbf..42da16b8cac 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size) #define EXT3_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext3_permission (struct inode *, int, struct nameidata *); +extern int ext3_permission (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 3c8dab880d9..c7d04e16544 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask) } int -ext4_permission(struct inode *inode, int mask, struct nameidata *nd) +ext4_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext4_check_acl); } diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 26a5c1abf14..cd2b855a07d 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size) #define EXT4_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext4_permission (struct inode *, int, struct nameidata *); +extern int ext4_permission (struct inode *, int); extern int ext4_acl_chmod (struct inode *); extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 51d0035ff07..48a7934cb95 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -898,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask) return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); - inarg.mask = mask; + inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); req->in.h.opcode = FUSE_ACCESS; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; @@ -927,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) +static int fuse_permission(struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; @@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) exist. So if permissions are revoked this won't be noticed immediately, only after the attribute timeout has expired */ - } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) { + } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1e252dfc529..4e982532f08 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask) return error; } -static int gfs2_iop_permission(struct inode *inode, int mask, - struct nameidata *nd) -{ - return gfs2_permission(inode, mask); -} - static int setattr_size(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) } const struct inode_operations gfs2_file_iops = { - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, @@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = { .rmdir = gfs2_rmdir, .mknod = gfs2_mknod, .rename = gfs2_rename, - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, @@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = { const struct inode_operations gfs2_symlink_iops = { .readlink = gfs2_readlink, .follow_link = gfs2_follow_link, - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index dc4ec640e87..aa73f3fd5dd 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode) } } -static int hfs_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int hfs_permission(struct inode *inode, int mask) { if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) return 0; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index cc3b5e24339..d4014e3044d 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); } -static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd) +static int hfsplus_permission(struct inode *inode, int mask) { /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, * open_exec has the same test, so it's still not executable, if a x bit diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 5222345ddcc..d6ecabf4d23 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return err; } -int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) +int hostfs_permission(struct inode *ino, int desired) { char *name; int r = 0, w = 0, x = 0, err; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 4c80404a9ab..d98713777a1 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } -int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd) +int jffs2_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, jffs2_check_acl); } diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 0bb7f003fd8..8ca058aed38 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { #define JFFS2_ACL_NOT_CACHED ((void *)-1) -extern int jffs2_permission(struct inode *, int, struct nameidata *); +extern int jffs2_permission(struct inode *, int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 4d84bdc8829..d3e5c33665d 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask) return -EAGAIN; } -int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int jfs_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, jfs_check_acl); } diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 455fa429204..88475f10a38 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_permission(struct inode *, int, struct nameidata *); +int jfs_permission(struct inode *, int); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff --git a/fs/namei.c b/fs/namei.c index 3b26a240ade..46af98ed136 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -185,6 +185,8 @@ int generic_permission(struct inode *inode, int mask, { umode_t mode = inode->i_mode; + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; + if (current->fsuid == inode->i_uid) mode >>= 6; else { @@ -203,7 +205,7 @@ int generic_permission(struct inode *inode, int mask, /* * If the DACs are ok we don't need any capability check. */ - if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) + if ((mask & ~mode) == 0) return 0; check_capabilities: @@ -228,7 +230,7 @@ int generic_permission(struct inode *inode, int mask, int permission(struct inode *inode, int mask, struct nameidata *nd) { - int retval, submask; + int retval; struct vfsmount *mnt = NULL; if (nd) @@ -261,9 +263,17 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) } /* Ordinary permission routines do not understand MAY_APPEND. */ - submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) { - retval = inode->i_op->permission(inode, submask, nd); + int extra = 0; + if (nd) { + if (nd->flags & LOOKUP_ACCESS) + extra |= MAY_ACCESS; + if (nd->flags & LOOKUP_CHDIR) + extra |= MAY_CHDIR; + if (nd->flags & LOOKUP_OPEN) + extra |= MAY_OPEN; + } + retval = inode->i_op->permission(inode, mask | extra); if (!retval) { /* * Exec permission on a regular file is denied if none @@ -277,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) return -EACCES; } } else { - retval = generic_permission(inode, submask, NULL); + retval = generic_permission(inode, mask, NULL); } if (retval) return retval; @@ -286,7 +296,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; - return security_inode_permission(inode, mask, nd); + return security_inode_permission(inode, + mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd); } /** diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 28a238dab23..74f92b717f7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) return status; nfs_access_add_cache(inode, &cache); out: - if ((cache.mask & mask) == mask) + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) return 0; return -EACCES; } @@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int nfs_permission(struct inode *inode, int mask) { struct rpc_cred *cred; int res = 0; nfs_inc_stats(inode, NFSIOS_VFSACCESS); - if (mask == 0) + if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) goto out; /* Is this sys_access() ? */ - if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) + if (mask & MAY_ACCESS) goto force_lookup; switch (inode->i_mode & S_IFMT) { @@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) case S_IFREG: /* NFSv4 has atomic_open... */ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && nd != NULL - && (nd->flags & LOOKUP_OPEN)) + && (mask & MAY_OPEN)) goto out; break; case S_IFDIR: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e8514e8b6ce..be2dd95d3a1 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1176,7 +1176,7 @@ bail: return err; } -int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +int ocfs2_permission(struct inode *inode, int mask) { int ret; diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 048ddcaf5c8..1e27b4d017e 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int ocfs2_permission(struct inode *inode, int mask, - struct nameidata *nd); +int ocfs2_permission(struct inode *inode, int mask); int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt); diff --git a/fs/proc/base.c b/fs/proc/base.c index 81bce6791bf..d82d800389f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1859,8 +1859,7 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int proc_fd_permission(struct inode *inode, int mask) { int rv; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fa1ec2433e4..f9a8b892718 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -292,7 +292,7 @@ out: return ret; } -static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) +static int proc_sys_permission(struct inode *inode, int mask) { /* * sysctl entries that are not writeable, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d7c4935c103..bb3cb5b7cdb 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int reiserfs_permission(struct inode *inode, int mask) { /* * We don't do permission checks on the internal objects. diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 2294783320c..e4f8d51a555 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file) * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) +smb_file_permission(struct inode *inode, int mask) { int mode = inode->i_mode; int error = 0; @@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) /* Look at user permissions */ mode >>= 6; - if ((mode & 7 & mask) != mask) + if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) error = -EACCES; return error; } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2bf287ef548..5fc61c824bb 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -589,8 +589,7 @@ xfs_check_acl( STATIC int xfs_vn_permission( struct inode *inode, - int mask, - struct nameidata *nd) + int mask) { return generic_permission(inode, mask, xfs_check_acl); } diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 31b75311e2c..dcc228aa335 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask, struct nameidata *nd); +int coda_permission(struct inode *inode, int mask); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7721a2ac9c0..6c923c9b79b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -60,6 +60,9 @@ extern int dir_notify_enable; #define MAY_WRITE 2 #define MAY_READ 4 #define MAY_APPEND 8 +#define MAY_ACCESS 16 +#define MAY_CHDIR 32 +#define MAY_OPEN 64 #define FMODE_READ 1 #define FMODE_WRITE 2 @@ -1272,7 +1275,7 @@ struct inode_operations { void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); + int (*permission) (struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 29d26191873..f08f9ca602a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -332,7 +332,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int nfs_permission(struct inode *, int, struct nameidata *); +extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index 66a96814d61..af135ae895d 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -55,7 +55,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); int reiserfs_xattr_init(struct super_block *sb, int mount_flags); -int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd); +int reiserfs_permission(struct inode *inode, int mask); int reiserfs_xattr_del(struct inode *, const char *); int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index f2d12d5a21b..fd83f2584b1 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -43,7 +43,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) } #ifdef CONFIG_TMPFS_POSIX_ACL -int shmem_permission(struct inode *, int, struct nameidata *); +int shmem_permission(struct inode *, int); int shmem_acl_init(struct inode *, struct inode *); void shmem_acl_destroy_inode(struct inode *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff5abcca5dd..911d846f050 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1516,9 +1516,9 @@ static int do_sysctl_strategy(struct ctl_table_root *root, int op = 0, rc; if (oldval) - op |= 004; + op |= MAY_READ; if (newval) - op |= 002; + op |= MAY_WRITE; if (sysctl_perm(root, table, op)) return -EPERM; @@ -1560,7 +1560,7 @@ repeat: if (n == table->ctl_name) { int error; if (table->child) { - if (sysctl_perm(root, table, 001)) + if (sysctl_perm(root, table, MAY_EXEC)) return -EPERM; name++; nlen--; @@ -1635,7 +1635,7 @@ static int test_perm(int mode, int op) mode >>= 6; else if (in_egroup_p(0)) mode >>= 3; - if ((mode & op & 0007) == op) + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; return -EACCES; } @@ -1645,7 +1645,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) int error; int mode; - error = security_sysctl(table, op); + error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); if (error) return error; diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c index f5664c5b9eb..8e5aadd7dcd 100644 --- a/mm/shmem_acl.c +++ b/mm/shmem_acl.c @@ -191,7 +191,7 @@ shmem_check_acl(struct inode *inode, int mask) * shmem_permission - permission() inode operation */ int -shmem_permission(struct inode *inode, int mask, struct nameidata *nd) +shmem_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, shmem_check_acl); } -- cgit v1.2.3 From bfbcf034798b2ca45338cee5049b5694b7ddc865 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jul 2008 06:31:22 +0100 Subject: lost sysctl fix try_attach() should walk into the matching subdirectory, not the first one... Signed-off-by: Al Viro Tested-by: Valdis.Kletnieks@vt.edu Tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 911d846f050..fe471334727 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1680,43 +1680,45 @@ static __init int sysctl_init(void) core_initcall(sysctl_init); -static int is_branch_in(struct ctl_table *branch, struct ctl_table *table) +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) { struct ctl_table *p; const char *s = branch->procname; /* branch should have named subdirectory as its first element */ if (!s || !branch->child) - return 0; + return NULL; /* ... and nothing else */ if (branch[1].procname || branch[1].ctl_name) - return 0; + return NULL; /* table should contain subdirectory with the same name */ for (p = table; p->procname || p->ctl_name; p++) { if (!p->child) continue; if (p->procname && strcmp(p->procname, s) == 0) - return 1; + return p; } - return 0; + return NULL; } /* see if attaching q to p would be an improvement */ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) { struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; int is_better = 0; int not_in_parent = !p->attached_by; - while (is_branch_in(by, to)) { + while ((next = is_branch_in(by, to)) != NULL) { if (by == q->attached_by) is_better = 1; if (to == p->attached_by) not_in_parent = 1; by = by->child; - to = to->child; + to = next->child; } if (is_better && not_in_parent) { -- cgit v1.2.3