From e5ff5ce6e20ee22511398bb31fb912466cf82a36 Mon Sep 17 00:00:00 2001 From: "Michael Kerrisk (man-pages)" Date: Wed, 25 Jan 2017 14:03:36 +1300 Subject: nsfs: Add an ioctl() to return the namespace type Linux 4.9 added two ioctl() operations that can be used to discover: * the parental relationships for hierarchical namespaces (user and PID) [NS_GET_PARENT] * the user namespaces that owns a specified non-user-namespace [NS_GET_USERNS] For no good reason that I can glean, NS_GET_USERNS was made synonymous with NS_GET_PARENT for user namespaces. It might have been better if NS_GET_USERNS had returned an error if the supplied file descriptor referred to a user namespace, since it suggests that the caller may be confused. More particularly, if it had generated an error, then I wouldn't need the new ioctl() operation proposed here. (On the other hand, what I propose here may be more generally useful.) I would like to write code that discovers namespace relationships for the purpose of understanding the namespace setup on a running system. In particular, given a file descriptor (or pathname) for a namespace, N, I'd like to obtain the corresponding user namespace. Namespace N might be a user namespace (in which case my code would just use N) or a non-user namespace (in which case my code will use NS_GET_USERNS to get the user namespace associated with N). The problem is that there is no way to tell the difference by looking at the file descriptor (and if I try to use NS_GET_USERNS on an N that is a user namespace, I get the parent user namespace of N, which is not what I want). This patch therefore adds a new ioctl(), NS_GET_NSTYPE, which, given a file descriptor that refers to a user namespace, returns the namespace type (one of the CLONE_NEW* constants). Signed-off-by: Michael Kerrisk Signed-off-by: Eric W. Biederman --- include/uapi/linux/nsfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 3af617230d1b..2b48df11056a 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -9,5 +9,8 @@ #define NS_GET_USERNS _IO(NSIO, 0x1) /* Returns a file descriptor that refers to a parent namespace */ #define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Feb 2017 06:06:16 +1300 Subject: fs: Better permission checking for submounts To support unprivileged users mounting filesystems two permission checks have to be performed: a test to see if the user allowed to create a mount in the mount namespace, and a test to see if the user is allowed to access the specified filesystem. The automount case is special in that mounting the original filesystem grants permission to mount the sub-filesystems, to any user who happens to stumble across the their mountpoint and satisfies the ordinary filesystem permission checks. Attempting to handle the automount case by using override_creds almost works. It preserves the idea that permission to mount the original filesystem is permission to mount the sub-filesystem. Unfortunately using override_creds messes up the filesystems ordinary permission checks. Solve this by being explicit that a mount is a submount by introducing vfs_submount, and using it where appropriate. vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let sget and friends know that a mount is a submount so they can take appropriate action. sget and sget_userns are modified to not perform any permission checks on submounts. follow_automount is modified to stop using override_creds as that has proven problemantic. do_mount is modified to always remove the new MS_SUBMOUNT flag so that we know userspace will never by able to specify it. autofs4 is modified to stop using current_real_cred that was put in there to handle the previous version of submount permission checking. cifs is modified to pass the mountpoint all of the way down to vfs_submount. debugfs is modified to pass the mountpoint all of the way down to trace_automount by adding a new parameter. To make this change easier a new typedef debugfs_automount_t is introduced to capture the type of the debugfs automount function. Cc: stable@vger.kernel.org Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using current_real_cred()->uid") Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds") Reviewed-by: Trond Myklebust Reviewed-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/afs/mntpt.c | 2 +- fs/autofs4/waitq.c | 4 ++-- fs/cifs/cifs_dfs_ref.c | 7 ++++--- fs/debugfs/inode.c | 8 ++++---- fs/namei.c | 3 --- fs/namespace.c | 17 ++++++++++++++++- fs/nfs/namespace.c | 2 +- fs/nfs/nfs4namespace.c | 2 +- fs/super.c | 13 ++++++++++--- include/linux/debugfs.h | 3 ++- include/linux/mount.h | 3 +++ include/uapi/linux/fs.h | 1 + kernel/trace/trace.c | 4 ++-- 13 files changed, 47 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 81dd075356b9..d4fb0afc0097 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* try and do the mount */ _debug("--- attempting mount %s -o %s ---", devname, options); - mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); + mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); _debug("--- mount result %p ---", mnt); free_page((unsigned long) devname); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 1278335ce366..79fbd85db4ba 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -436,8 +436,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, memcpy(&wq->name, &qstr, sizeof(struct qstr)); wq->dev = autofs4_get_dev(sbi); wq->ino = autofs4_get_ino(sbi); - wq->uid = current_real_cred()->uid; - wq->gid = current_real_cred()->gid; + wq->uid = current_cred()->uid; + wq->gid = current_cred()->gid; wq->pid = pid; wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index ec9dbbcca3b9..9156be545b0f 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -245,7 +245,8 @@ compose_mount_options_err: * @fullpath: full path in UNC format * @ref: server's referral */ -static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, +static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, + struct cifs_sb_info *cifs_sb, const char *fullpath, const struct dfs_info3_param *ref) { struct vfsmount *mnt; @@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, if (IS_ERR(mountdata)) return (struct vfsmount *)mountdata; - mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata); + mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); kfree(mountdata); kfree(devname); return mnt; @@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) mnt = ERR_PTR(-EINVAL); break; } - mnt = cifs_dfs_do_refmount(cifs_sb, + mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, full_path, referrals + i); cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", __func__, referrals[i].node_name, mnt); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f17fcf89e18e..1e30f74a9527 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -187,9 +187,9 @@ static const struct super_operations debugfs_super_operations = { static struct vfsmount *debugfs_automount(struct path *path) { - struct vfsmount *(*f)(void *); - f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata; - return f(d_inode(path->dentry)->i_private); + debugfs_automount_t f; + f = (debugfs_automount_t)path->dentry->d_fsdata; + return f(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); */ struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data) { struct dentry *dentry = start_creating(name, parent); diff --git a/fs/namei.c b/fs/namei.c index 6fa3e9138fe4..da689c9c005e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1100,7 +1100,6 @@ static int follow_automount(struct path *path, struct nameidata *nd, bool *need_mntput) { struct vfsmount *mnt; - const struct cred *old_cred; int err; if (!path->dentry->d_op || !path->dentry->d_op->d_automount) @@ -1129,9 +1128,7 @@ static int follow_automount(struct path *path, struct nameidata *nd, if (nd->total_link_count >= 40) return -ELOOP; - old_cred = override_creds(&init_cred); mnt = path->dentry->d_op->d_automount(path); - revert_creds(old_cred); if (IS_ERR(mnt)) { /* * The filesystem is allowed to return -EISDIR here to indicate diff --git a/fs/namespace.c b/fs/namespace.c index 487ba30bb5c6..089a6b23135a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -989,6 +989,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void } EXPORT_SYMBOL_GPL(vfs_kern_mount); +struct vfsmount * +vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, + const char *name, void *data) +{ + /* Until it is worked out how to pass the user namespace + * through from the parent mount to the submount don't support + * unprivileged mounts with submounts. + */ + if (mountpoint->d_sb->s_user_ns != &init_user_ns) + return ERR_PTR(-EPERM); + + return vfs_kern_mount(type, MS_SUBMOUNT, name, data); +} +EXPORT_SYMBOL_GPL(vfs_submount); + static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { @@ -2794,7 +2809,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | - MS_STRICTATIME | MS_NOREMOTELOCK); + MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5551e8ef67fd..e49d831c4e85 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { - return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); } /** diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index d21104912676..d8b040bd9814 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, mountdata->hostname, mountdata->mnt_path); - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); + mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); if (!IS_ERR(mnt)) break; } diff --git a/fs/super.c b/fs/super.c index 1709ed029a2c..4185844f7a12 100644 --- a/fs/super.c +++ b/fs/super.c @@ -469,7 +469,7 @@ struct super_block *sget_userns(struct file_system_type *type, struct super_block *old; int err; - if (!(flags & MS_KERNMOUNT) && + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !(type->fs_flags & FS_USERNS_MOUNT) && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -499,7 +499,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, flags, user_ns); + s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -540,8 +540,15 @@ struct super_block *sget(struct file_system_type *type, { struct user_namespace *user_ns = current_user_ns(); + /* We don't yet pass the user namespace of the parent + * mount through to here so always use &init_user_ns + * until that changes. + */ + if (flags & MS_SUBMOUNT) + user_ns = &init_user_ns; + /* Ensure the requestor has permissions over the target filesystem */ - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); return sget_userns(type, test, set, flags, user_ns, data); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 014cc564d1c4..233006be30aa 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -97,9 +97,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data); void debugfs_remove(struct dentry *dentry); diff --git a/include/linux/mount.h b/include/linux/mount.h index c6f55158d5e5..8e0352af06b7 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -90,6 +90,9 @@ struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); +extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, + struct file_system_type *type, + const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 36da93fbf188..048a85e9f017 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -132,6 +132,7 @@ struct inodes_stat_t { #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) #define MS_NOREMOTELOCK (1<<27) #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d7449783987a..310f0ea0d1a2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7503,7 +7503,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) ftrace_init_tracefs(tr, d_tracer); } -static struct vfsmount *trace_automount(void *ingore) +static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; struct file_system_type *type; @@ -7516,7 +7516,7 @@ static struct vfsmount *trace_automount(void *ingore) type = get_fs_type("tracefs"); if (!type) return NULL; - mnt = vfs_kern_mount(type, 0, "tracefs", NULL); + mnt = vfs_submount(mntpt, type, "tracefs", NULL); put_filesystem(type); if (IS_ERR(mnt)) return NULL; -- cgit v1.2.3 From d95fa3c76a66b6d76b1e109ea505c55e66360f3c Mon Sep 17 00:00:00 2001 From: "Michael Kerrisk (man-pages)" Date: Wed, 25 Jan 2017 14:04:15 +1300 Subject: nsfs: Add an ioctl() to return owner UID of a userns I'd like to write code that discovers the user namespace hierarchy on a running system, and also shows who owns the various user namespaces. Currently, there is no way of getting the owner UID of a user namespace. Therefore, this patch adds a new NS_GET_CREATOR_UID ioctl() that fetches the UID (as seen in the user namespace of the caller) of the creator of the user namespace referred to by the specified file descriptor. If the supplied file descriptor does not refer to a user namespace, the operation fails with the error EINVAL. If the owner UID does not have a mapping in the caller's user namespace return the overflow UID as that appears easier to deal with in practice in user-space applications. -- EWB Changed the handling of unmapped UIDs from -EOVERFLOW back to the overflow uid. Per conversation with Michael Kerrisk after examining his test code. Acked-by: Andrey Vagin Signed-off-by: Michael Kerrisk Signed-off-by: Eric W. Biederman --- fs/nsfs.c | 11 +++++++++++ include/uapi/linux/nsfs.h | 8 +++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/fs/nsfs.c b/fs/nsfs.c index 5d534763c662..1656843e87d2 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -7,6 +7,7 @@ #include #include #include +#include static struct vfsmount *nsfs_mnt; @@ -163,7 +164,10 @@ int open_related_ns(struct ns_common *ns, static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct user_namespace *user_ns; struct ns_common *ns = get_proc_ns(file_inode(filp)); + uid_t __user *argp; + uid_t uid; switch (ioctl) { case NS_GET_USERNS: @@ -174,6 +178,13 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, return open_related_ns(ns, ns->ops->get_parent); case NS_GET_NSTYPE: return ns->ops->type; + case NS_GET_OWNER_UID: + if (ns->ops->type != CLONE_NEWUSER) + return -EINVAL; + user_ns = container_of(ns, struct user_namespace, ns); + argp = (uid_t __user *) arg; + uid = from_kuid_munged(current_user_ns(), user_ns->owner); + return put_user(uid, argp); default: return -ENOTTY; } diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 2b48df11056a..1a3ca79f466b 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -6,11 +6,13 @@ #define NSIO 0xb7 /* Returns a file descriptor that refers to an owning user namespace */ -#define NS_GET_USERNS _IO(NSIO, 0x1) +#define NS_GET_USERNS _IO(NSIO, 0x1) /* Returns a file descriptor that refers to a parent namespace */ -#define NS_GET_PARENT _IO(NSIO, 0x2) +#define NS_GET_PARENT _IO(NSIO, 0x2) /* Returns the type of namespace (CLONE_NEW* value) referred to by file descriptor */ -#define NS_GET_NSTYPE _IO(NSIO, 0x3) +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3