diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-13 16:46:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-13 16:46:18 -0700 |
commit | 16cefa8c3863721fd40445a1b34dea18cd16ccfe (patch) | |
tree | c8e58ca06e2edfd667d3e6062a642b80cc58e5e7 | |
parent | 4fbef206daead133085fe33905f5e842d38fb8da (diff) | |
parent | d8558f99fbc5ef5d4ae76b893784005056450f82 (diff) |
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (122 commits)
sunrpc: drop BKL around wrap and unwrap
NFSv4: Make sure unlock is really an unlock when cancelling a lock
NLM: fix source address of callback to client
SUNRPC client: add interface for binding to a local address
SUNRPC server: record the destination address of a request
SUNRPC: cleanup transport creation argument passing
NFSv4: Make the NFS state model work with the nosharedcache mount option
NFS: Error when mounting the same filesystem with different options
NFS: Add the mount option "nosharecache"
NFS: Add support for mounting NFSv4 file systems with string options
NFS: Add final pieces to support in-kernel mount option parsing
NFS: Introduce generic mount client API
NFS: Add enums and match tables for mount option parsing
NFS: Improve debugging output in NFS in-kernel mount client
NFS: Clean up in-kernel NFS mount
NFS: Remake nfsroot_mount as a permanent part of NFS client
SUNRPC: Add a convenient default for the hostname when calling rpc_create()
SUNRPC: Rename rpcb_getport to be consistent with new rpcb_getport_sync name
SUNRPC: Rename rpcb_getport_external routine
SUNRPC: Allow rpcbind requests to be interrupted by a signal.
...
56 files changed, 3321 insertions, 1810 deletions
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 96070bff93f..572601e98dc 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, */ static struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *sin, - int proto, int version, - const char *hostname, - int hostname_len) + int proto, int version, const char *hostname, + int hostname_len, const struct sockaddr_in *ssin) { struct hlist_head *chain; struct hlist_node *pos; @@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, struct nsm_handle *nsm = NULL; int hash; - dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", + dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT + ", p=%d, v=%d, my role=%s, name=%.*s)\n", + NIPQUAD(ssin->sin_addr.s_addr), NIPQUAD(sin->sin_addr.s_addr), proto, version, server? "server" : "client", hostname_len, @@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, continue; if (host->h_server != server) continue; + if (!nlm_cmp_addr(&host->h_saddr, ssin)) + continue; /* Move to head of hash chain. */ hlist_del(&host->h_hash); @@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, host->h_name = nsm->sm_name; host->h_addr = *sin; host->h_addr.sin_port = 0; /* ouch! */ + host->h_saddr = *ssin; host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; @@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host) */ nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } + clnt = host->h_rpcclnt; + if (clnt != NULL) + rpc_shutdown_client(clnt); kfree(host); } @@ -180,8 +178,10 @@ struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + return nlm_lookup_host(0, sin, proto, version, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -191,9 +191,12 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + + ssin.sin_addr = rqstp->rq_daddr.addr; return nlm_lookup_host(1, svc_addr_in(rqstp), rqstp->rq_prot, rqstp->rq_vers, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host) { struct rpc_clnt *clnt; - dprintk("lockd: nlm_bind_host(%08x)\n", - (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); + dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", + NIPQUAD(host->h_saddr.sin_addr), + NIPQUAD(host->h_addr.sin_addr)); /* Lock host handle */ mutex_lock(&host->h_mutex); @@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host) .protocol = host->h_proto, .address = (struct sockaddr *)&host->h_addr, .addrsize = sizeof(host->h_addr), + .saddress = (struct sockaddr *)&host->h_saddr, .timeout = &timeparms, .servername = host->h_name, .program = &nlm_program, diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2102e2d0134..3353ed8421a 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) status); else status = 0; + rpc_shutdown_client(clnt); out: return status; } @@ -138,7 +139,6 @@ nsm_create(void) .program = &nsm_program, .version = SM_VERSION, .authflavor = RPC_AUTH_NULL, - .flags = (RPC_CLNT_CREATE_ONESHOT), }; return rpc_create(&args); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 126b1bf02c0..26809325469 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp) /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); - /* kick rpciod */ - rpciod_up(); - dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); if (!nlm_timeout) @@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp) /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* release rpciod */ - rpciod_down(); - /* Release module */ unlock_kernel(); module_put_and_exit(0); diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index f4580b44eef..b55cb236cf7 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ pagelist.o proc.o read.o symlink.o unlink.o \ - write.o namespace.o -nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o + write.o namespace.o mount_clnt.o +nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 881fa490092..ccb455053ee 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, int nfsversion) { struct nfs_client *clp; - int error; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto error_1; - } - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); - if (nfsversion == 4) { if (nfs_callback_up() < 0) goto error_2; @@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, #ifdef CONFIG_NFS_V4 init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); @@ -154,9 +143,6 @@ error_3: if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); error_2: - rpciod_down(); - __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); -error_1: kfree(clp); error_0: return NULL; @@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) #ifdef CONFIG_NFS_V4 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - while (!list_empty(&clp->cl_unused)) { - struct nfs4_state_owner *sp; - - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); + BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); #endif @@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp) if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); - if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); - kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f37d1bea83..20ac403469a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation) kfree(delegation); } +static void nfs_free_delegation_callback(struct rcu_head *head) +{ + struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); + + nfs_free_delegation(delegation); +} + static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; @@ -57,7 +64,7 @@ out_err: return status; } -static void nfs_delegation_claim_opens(struct inode *inode) +static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -72,9 +79,11 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx->dentry, state); + err = nfs4_open_delegation_recall(ctx, state, stateid); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); @@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; - /* Ensure we first revalidate the attributes and page cache! */ - if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); if (delegation == NULL) return -ENOMEM; @@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->inode = inode; spin_lock(&clp->cl_lock); - if (nfsi->delegation == NULL) { - list_add(&delegation->super_list, &clp->cl_delegations); - nfsi->delegation = delegation; + if (rcu_dereference(nfsi->delegation) == NULL) { + list_add_rcu(&delegation->super_list, &clp->cl_delegations); nfsi->delegation_state = delegation->type; + rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; } else { if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, @@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct status = -EIO; } } + + /* Ensure we revalidate the attributes and page cache! */ + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); + spin_unlock(&clp->cl_lock); kfree(delegation); return status; @@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * int res = 0; res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); - nfs_free_delegation(delegation); + call_rcu(&delegation->rcu, nfs_free_delegation_callback); return res; } @@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -int __nfs_inode_return_delegation(struct inode *inode) +static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; - int res = 0; nfs_msync_inode(inode); down_read(&clp->cl_sem); /* Guard against new delegated open calls */ down_write(&nfsi->rwsem); - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); - if (delegation != NULL) - res = nfs_do_return_delegation(inode, delegation); - return res; + return nfs_do_return_delegation(inode, delegation); +} + +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +{ + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + + if (delegation == NULL) + goto nomatch; + if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + goto nomatch; + list_del_rcu(&delegation->super_list); + nfsi->delegation_state = 0; + rcu_assign_pointer(nfsi->delegation, NULL); + return delegation; +nomatch: + return NULL; +} + +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int err = 0; + + if (rcu_dereference(nfsi->delegation) != NULL) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + err = __nfs_inode_return_delegation(inode, delegation); + } + return err; } /* @@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (delegation->inode->i_sb != sb) continue; inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } static int nfs_do_expire_all_delegations(void *ptr) @@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr) allow_signal(SIGKILL); restart: - spin_lock(&clp->cl_lock); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } + rcu_read_unlock(); out: - spin_unlock(&clp->cl_lock); nfs_put_client(clp); module_put_and_exit(0); } @@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } struct recall_threadargs { @@ -316,21 +361,14 @@ static int recall_thread(void *data) down_read(&clp->cl_sem); down_write(&nfsi->rwsem); spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL && memcmp(delegation->stateid.data, - args->stateid->data, - sizeof(delegation->stateid.data)) == 0) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; + delegation = nfs_detach_delegation_locked(nfsi, args->stateid); + if (delegation != NULL) args->result = 0; - } else { - delegation = NULL; + else args->result = -ENOENT; - } spin_unlock(&clp->cl_lock); complete(&args->started); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, args->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); @@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs { struct nfs_delegation *delegation; struct inode *res = NULL; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { res = igrab(delegation->inode); break; } } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); return res; } @@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } /* @@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp) */ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { - struct nfs_delegation *delegation, *n; - LIST_HEAD(head); - spin_lock(&clp->cl_lock); - list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + struct nfs_delegation *delegation; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) continue; - list_move(&delegation->super_list, &head); - NFS_I(delegation->inode)->delegation = NULL; - NFS_I(delegation->inode)->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - while(!list_empty(&head)) { - delegation = list_entry(head.next, struct nfs_delegation, super_list); - list_del(&delegation->super_list); - nfs_free_delegation(delegation); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) + call_rcu(&delegation->rcu, nfs_free_delegation_callback); + goto restart; } + rcu_read_unlock(); } int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int res = 0; + int ret = 0; - if (nfsi->delegation_state == 0) - return 0; - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); if (delegation != NULL) { memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); - res = 1; + ret = 1; } - spin_unlock(&clp->cl_lock); - return res; + rcu_read_unlock(); + return ret; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2cfd4b24c7f..5874ce7fdba 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -22,11 +22,12 @@ struct nfs_delegation { long flags; loff_t maxsize; __u64 change_attr; + struct rcu_head rcu; }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int __nfs_inode_return_delegation(struct inode *inode); +int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); @@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); static inline int nfs_have_delegation(struct inode *inode, int flags) { + struct nfs_delegation *delegation; + int ret = 0; + flags &= FMODE_READ|FMODE_WRITE; - smp_rmb(); - if ((NFS_I(inode)->delegation_state & flags) == flags) - return 1; - return 0; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && (delegation->type & flags) == flags) + ret = 1; + rcu_read_unlock(); + return ret; } -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - int err = 0; - - if (NFS_I(inode)->delegation != NULL) - err = __nfs_inode_return_delegation(inode); - return err; -} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c27258b5d3e..322141f4ab4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) - /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); + /* Revalidate fsid using the parent directory */ + return __nfs_revalidate_inode(server, dir); return 0; } @@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); + error = nfs_reval_fsid(dir, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && (nd->flags & LOOKUP_CREATE)) + if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; lock_kernel(); @@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym lock_kernel(); - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_HIGHUSER); if (!page) { unlock_kernel(); return -ENOMEM; @@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) struct nfs_inode *nfsi; struct nfs_access_entry *cache; - spin_lock(&nfs_access_lru_lock); restart: + spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -1770,6 +1769,7 @@ remove_lru_entry: clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); } spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); iput(inode); goto restart; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 00eee87510f..a5c82b6f3b4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int pgbase; int result; @@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); @@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = { static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; @@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); @@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; - - if (count < 0) goto out; + retval = -EFAULT; if (!access_ok(VERIFY_WRITE, buf, count)) goto out; @@ -814,7 +822,7 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - ssize_t retval; + ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; /* XXX: temporary */ @@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; + goto out; retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd9f5a83659..3d9fccf4ef9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - atomic_set(&ctx->count, 1); - ctx->dentry = dget(dentry); - ctx->vfsmnt = mntget(mnt); + ctx->path.dentry = dget(dentry); + ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; + kref_init(&ctx->kref); } return ctx; } @@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + kref_get(&ctx->kref); return ctx; } -void put_nfs_open_context(struct nfs_open_context *ctx) +static void nfs_free_open_context(struct kref *kref) { - if (atomic_dec_and_test(&ctx->count)) { - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } - if (ctx->state != NULL) - nfs4_close_state(ctx->state, ctx->mode); - if (ctx->cred != NULL) - put_rpccred(ctx->cred); - dput(ctx->dentry); - mntput(ctx->vfsmnt); - kfree(ctx); + struct nfs_open_context *ctx = container_of(kref, + struct nfs_open_context, kref); + + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->path.dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); } + if (ctx->state != NULL) + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->path.dentry); + mntput(ctx->path.mnt); + kfree(ctx); +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + kref_put(&ctx->kref, nfs_free_open_context); } /* @@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_changed; server = NFS_SERVER(inode); - /* Update the fsid if and only if this is the root directory */ - if (inode == inode->i_sb->s_root->d_inode + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) server->fsid = fattr->fsid; @@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid &= ~NFS_INO_INVALID_DATA; if (data_stable) invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); - if (!nfs_have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegation(inode, FMODE_READ) || + (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: @@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); - /* Now clear out any remaining state */ - while (!list_empty(&nfsi->open_states)) { - struct nfs4_state *state; - - state = list_entry(nfsi->open_states.next, - struct nfs4_state, - inode_states); - dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", - __FUNCTION__, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - state); - BUG_ON(atomic_read(&state->count) != 1); - nfs4_close_state(state, state->state); - } } #endif @@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct nfs_inode *nfsi = (struct nfs_inode *) foo; inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ad2b40db1e6..76cf55d5710 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) /* * Calculate the number of 512byte blocks used. */ -static inline unsigned long nfs_calc_block_size(u64 tsize) +static inline blkcnt_t nfs_calc_block_size(u64 tsize) { - loff_t used = (tsize + 511) >> 9; + blkcnt_t used = (tsize + 511) >> 9; return (used > ULONG_MAX) ? ULONG_MAX : used; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca5a266a314..8afd9f7e7a9 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -1,7 +1,5 @@ /* - * linux/fs/nfs/mount_clnt.c - * - * MOUNT client to support NFSroot. + * In-kernel MOUNT protocol client * * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> */ @@ -18,33 +16,31 @@ #include <linux/nfs_fs.h> #ifdef RPC_DEBUG -# define NFSDBG_FACILITY NFSDBG_ROOT +# define NFSDBG_FACILITY NFSDBG_MOUNT #endif -/* -#define MOUNT_PROGRAM 100005 -#define MOUNT_VERSION 1 -#define MOUNT_MNT 1 -#define MOUNT_UMNT 3 - */ - -static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, - int, int); static struct rpc_program mnt_program; struct mnt_fhstatus { - unsigned int status; - struct nfs_fh * fh; + u32 status; + struct nfs_fh *fh; }; -/* - * Obtain an NFS file handle for the given host and path +/** + * nfs_mount - Obtain an NFS file handle for the given host and path + * @addr: pointer to server's address + * @len: size of server's address + * @hostname: name of server host, or NULL + * @path: pointer to string containing export path to mount + * @version: mount version to use for this request + * @protocol: transport protocol to use for thie request + * @fh: pointer to location to place returned file handle + * + * Uses default timeout parameters specified by underlying transport. */ -int -nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, - int version, int protocol) +int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, + int version, int protocol, struct nfs_fh *fh) { - struct rpc_clnt *mnt_clnt; struct mnt_fhstatus result = { .fh = fh }; @@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, .rpc_argp = path, .rpc_resp = &result, }; - char hostname[32]; + struct rpc_create_args args = { + .protocol = protocol, + .address = addr, + .addrsize = len, + .servername = hostname, + .program = &mnt_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_INTR, + }; + struct rpc_clnt *mnt_clnt; int status; - dprintk("NFS: nfs_mount(%08x:%s)\n", - (unsigned)ntohl(addr->sin_addr.s_addr), path); + dprintk("NFS: sending MNT request for %s:%s\n", + (hostname ? hostname : "server"), path); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); - mnt_clnt = mnt_create(hostname, addr, version, protocol); + mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) - return PTR_ERR(mnt_clnt); + goto out_clnt_err; if (version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; @@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); - return status < 0? status : (result.status? -EACCES : 0); -} + rpc_shutdown_client(mnt_clnt); -static struct rpc_clnt * -mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, - int protocol) -{ - struct rpc_create_args args = { - .protocol = protocol, - .address = (struct sockaddr *)srvaddr, - .addrsize = sizeof(*srvaddr), - .servername = hostname, - .program = &mnt_program, - .version = version, - .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_INTR), - }; + if (status < 0) + goto out_call_err; + if (result.status != 0) + goto out_mnt_err; + + dprintk("NFS: MNT request succeeded\n"); + status = 0; + +out: + return status; + +out_clnt_err: + status = PTR_ERR(mnt_clnt); + dprintk("NFS: failed to create RPC client, status=%d\n", status); + goto out; + +out_call_err: + dprintk("NFS: failed to start MNT request, status=%d\n", status); + goto out; - return rpc_create(&args); +out_mnt_err: + dprintk("NFS: MNT server returned result %d\n", result.status); + status = -EACCES; + goto out; } /* * XDR encode/decode functions for MOUNT */ -static int -xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) +static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, + const char *path) { p = xdr_encode_string(p, path); @@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) return 0; } -static int -xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) return 0; } -static int -xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) #define MNT_fhstatus_sz (1 + 8) #define MNT_fhstatus3_sz (1 + 16) -static struct rpc_procinfo mnt_procedures[] = { -[MNTPROC_MNT] = { - .p_proc = MNTPROC_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus_sz, - .p_statidx = MNTPROC_MNT, - .p_name = "MOUNT", +static struct rpc_procinfo mnt_procedures[] = { + [MNTPROC_MNT] = { + .p_proc = MNTPROC_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus_sz, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; static struct rpc_procinfo mnt3_procedures[] = { -[MOUNTPROC3_MNT] = { - .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus3_sz, - .p_statidx = MOUNTPROC3_MNT, - .p_name = "MOUNT", + [MOUNTPROC3_MNT] = { + .p_proc = MOUNTPROC3_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus3_sz, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; -static struct rpc_version mnt_version1 = { - .number = 1, - .nrprocs = 2, - .procs = mnt_procedures +static struct rpc_version mnt_version1 = { + .number = 1, + .nrprocs = 2, + .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { - .number = 3, - .nrprocs = 2, - .procs = mnt3_procedures +static struct rpc_version mnt_version3 = { + .number = 3, + .nrprocs = 2, + .procs = mnt3_procedures, }; -static struct rpc_version * mnt_version[] = { +static struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, &mnt_version3, }; -static struct rpc_stat mnt_stats; +static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cd3ca7b5d3d..7fcc78f2aa7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) static int nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 offset = (u32)args->offset; u32 count = args->count; @@ -380,7 +380,7 @@ static int nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) { struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) static int nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 45268d6def2..814d886b6aa 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -335,9 +335,7 @@ again: * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); - if (status == 0) - nfs_setattr_update_inode(dentry->d_inode, sattr); - nfs_refresh_inode(dentry->d_inode, &fattr); + nfs_post_op_update_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } if (status != 0) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b51df8eb9f0..b4647a22f34 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg static int nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) static int nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -643,7 +643,7 @@ static int nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, struct nfs3_getaclargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); @@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cf3a17eb5c0..6c028e734fe 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { - spinlock_t so_lock; - struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs_unique_id so_owner_id; struct nfs_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - atomic_t so_count; + struct nfs_server *so_server; + struct rb_node so_client_node; struct rpc_cred *so_cred; /* Associated cred */ + + spinlock_t so_lock; + atomic_t so_count; struct list_head so_states; struct list_head so_delegations; struct nfs_seqid_counter so_seqid; @@ -108,7 +115,7 @@ struct nfs4_lock_state { #define NFS_LOCK_INITIALIZED 1 int ls_flags; struct nfs_seqid_counter ls_seqid; - u32 ls_id; + struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; }; @@ -116,7 +123,10 @@ struct nfs4_lock_state { /* bits for nfs4_state->flags */ enum { LK_STATE_IN_USE, - NFS_DELEGATED_STATE, + NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ + NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ + NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ }; struct nfs4_state { @@ -130,11 +140,14 @@ struct nfs4_state { unsigned long flags; /* Do we hold any locks? */ spinlock_t state_lock; /* Protects the lock_states list */ - nfs4_stateid stateid; + seqlock_t seqlock; /* Protects the stateid/open_stateid */ + nfs4_stateid stateid; /* Current stateid: may be delegation */ + nfs4_stateid open_stateid; /* OPEN stateid */ - unsigned int n_rdonly; - unsigned int n_wronly; - unsigned int n_rdwr; + /* The following 3 fields are protected by owner->so_lock */ + unsigned int n_rdonly; /* Number of read-only references */ + unsigned int n_wronly; /* Number of write-only references */ + unsigned int n_rdwr; /* Number of read/write references */ int state; /* State on the server (R,W, or RW) */ atomic_t count; }; @@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1; #else -#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_close_state(a, b, c) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 648e0ac0f90..fee2da856c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) } struct nfs4_opendata { - atomic_t count; + struct kref kref; struct nfs_openargs o_arg; struct nfs_openres o_res; struct nfs_open_confirmargs c_arg; struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; - struct dentry *dentry; + struct path path; struct dentry *dir; struct nfs4_state_owner *owner; + struct nfs4_state *state; struct iattr attrs; unsigned long timestamp; + unsigned int rpc_done : 1; int rpc_status; int cancelled; }; -static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, + +static void nfs4_init_opendata_res(struct nfs4_opendata *p) +{ + p->o_res.f_attr = &p->f_attr; + p->o_res.dir_attr = &p->dir_attr; + p->o_res.server = p->o_arg.server; + nfs_fattr_init(&p->f_attr); + nfs_fattr_init(&p->dir_attr); +} + +static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, int flags, const struct iattr *attrs) { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dget_parent(path->dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; @@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - atomic_set(&p->count, 1); - p->dentry = dget(dentry); + p->path.mnt = mntget(path->mnt); + p->path.dentry = dget(path->dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_id; - p->o_arg.name = &dentry->d_name; + p->o_arg.id = sp->so_owner_id.id; + p->o_arg.name = &p->path.dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - p->o_res.f_attr = &p->f_attr; - p->o_res.dir_attr = &p->dir_attr; - p->o_res.server = server; - nfs_fattr_init(&p->f_attr); - nfs_fattr_init(&p->dir_attr); if (flags & O_EXCL) { u32 *s = (u32 *) p->o_arg.u.verifier.data; s[0] = jiffies; @@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; + nfs4_init_opendata_res(p); + kref_init(&p->kref); return p; err_free: kfree(p); @@ -282,27 +292,25 @@ err: return NULL; } -static void nfs4_opendata_free(struct nfs4_opendata *p) +static void nfs4_opendata_free(struct kref *kref) { - if (p != NULL && atomic_dec_and_test(&p->count)) { - nfs_free_seqid(p->o_arg.seqid); - nfs4_put_state_owner(p->owner); - dput(p->dir); - dput(p->dentry); - kfree(p); - } + struct nfs4_opendata *p = container_of(kref, + struct nfs4_opendata, kref); + + nfs_free_seqid(p->o_arg.seqid); + if (p->state != NULL) + nfs4_put_open_state(p->state); + nfs4_put_state_owner(p->owner); + dput(p->dir); + dput(p->path.dentry); + mntput(p->path.mnt); + kfree(p); } -/* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, - const struct rpc_call_ops *tk_ops, void *calldata) +static void nfs4_opendata_put(struct nfs4_opendata *p) { - struct rpc_task *task; - - if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) - return -ENOMEM; - rpc_execute(task); - return 0; + if (p != NULL) + kref_put(&p->kref, nfs4_opendata_free); } static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) @@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } -static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) +static int can_open_cached(struct nfs4_state *state, int mode) +{ + int ret = 0; + switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { + case FMODE_READ: + ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_WRITE: + ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_READ|FMODE_WRITE: + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + } + return ret; +} + +static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) +{ + if ((delegation->type & open_flags) != open_flags) + return 0; + if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) + return 0; + return 1; +} + +static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) { switch (open_flags) { case FMODE_WRITE: @@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_ case FMODE_READ|FMODE_WRITE: state->n_rdwr++; } + nfs4_state_set_mode_locked(state, state->state | open_flags); } -static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { - struct inode *inode = state->inode; + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); + memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + switch (open_flags) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); + } +} + +static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + write_seqlock(&state->seqlock); + nfs_set_open_stateid_locked(state, stateid, open_flags); + write_sequnlock(&state->seqlock); +} +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) +{ open_flags &= (FMODE_READ|FMODE_WRITE); - /* Protect against nfs4_find_state_byowner() */ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + if (deleg_stateid != NULL) { + memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + } + if (open_stateid != NULL) + nfs_set_open_stateid_locked(state, open_stateid, open_flags); + write_sequnlock(&state->seqlock); spin_lock(&state->owner->so_lock); - spin_lock(&inode->i_lock); - memcpy(&state->stateid, stateid, sizeof(state->stateid)); update_open_stateflags(state, open_flags); - nfs4_state_set_mode_locked(state, state->state | open_flags); - spin_unlock(&inode->i_lock); spin_unlock(&state->owner->so_lock); } +static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL || (delegation->type & open_flags) == open_flags) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + nfs_inode_return_delegation(inode); +} + +static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) +{ + struct nfs4_state *state = opendata->state; + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *delegation; + int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); + nfs4_stateid stateid; + int ret = -EAGAIN; + + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + for (;;) { + if (can_open_cached(state, open_mode)) { + spin_lock(&state->owner->so_lock); + if (can_open_cached(state, open_mode)) { + update_open_stateflags(state, open_mode); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + goto out_return_state; + } + spin_unlock(&state->owner->so_lock); + } + if (delegation == NULL) + break; + if (!can_open_delegated(delegation, open_mode)) + break; + /* Save the delegation */ + memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + rcu_read_unlock(); + lock_kernel(); + ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + unlock_kernel(); + if (ret != 0) + goto out; + ret = -EAGAIN; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + /* If no delegation, try a cached open */ + if (delegation == NULL) + continue; + /* Is the delegation still valid? */ + if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0) + continue; + rcu_read_unlock(); + update_open_stateid(state, NULL, &stateid, open_mode); + goto out_return_state; + } + rcu_read_unlock(); +out: + return ERR_PTR(ret); +out_return_state: + atomic_inc(&state->count); + return state; +} + static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { struct inode *inode; struct nfs4_state *state = NULL; + struct nfs_delegation *delegation; + nfs4_stateid *deleg_stateid = NULL; + int ret; - if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + if (!data->rpc_done) { + state = nfs4_try_open_cached(data); goto out; + } + + ret = -EAGAIN; + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + goto err; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + ret = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + goto err; + ret = -ENOMEM; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) - goto put_inode; - update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); -put_inode: + goto err_put_inode; + if (data->o_res.delegation_type != 0) { + int delegation_flags = 0; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); + if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + } + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) + deleg_stateid = &delegation->stateid; + update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); + rcu_read_unlock(); iput(inode); out: return state; +err_put_inode: + iput(inode); +err: + return ERR_PTR(ret); } static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) @@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) { + struct nfs4_state *newstate; int ret; opendata->o_arg.open_flags = openflags; + memset(&opendata->o_res, 0, sizeof(opendata->o_res)); + memset(&opendata->c_res, 0, sizeof(opendata->c_res)); + nfs4_init_opendata_res(opendata); ret = _nfs4_proc_open(opendata); if (ret != 0) return ret; - memcpy(stateid->data, opendata->o_res.stateid.data, - sizeof(stateid->data)); + newstate = nfs4_opendata_to_nfs4_state(opendata); + if (IS_ERR(newstate)) + return PTR_ERR(newstate); + nfs4_close_state(&opendata->path, newstate, openflags); + *res = newstate; return 0; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - nfs4_stateid stateid; struct nfs4_state *newstate; - int mode = 0; - int delegation = 0; int ret; /* memory barrier prior to reading state->n_* */ + clear_bit(NFS_DELEGATED_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ|FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_wronly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_rdonly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ; + if (newstate != state) + return -ESTALE; } - clear_bit(NFS_DELEGATED_STATE, &state->flags); - if (mode == 0) - return 0; - if (opendata->o_res.delegation_type == 0) - opendata->o_res.delegation_type = delegation; - opendata->o_arg.open_flags |= mode; - newstate = nfs4_opendata_to_nfs4_state(opendata); - if (newstate != NULL) { - if (opendata->o_res.delegation_type != 0) { - struct nfs_inode *nfsi = NFS_I(newstate->inode); - int delegation_flags = 0; - if (nfsi->delegation) - delegation_flags = nfsi->delegation->flags; - if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) - nfs_inode_set_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - else - nfs_inode_reclaim_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - } - nfs4_close_state(newstate, opendata->o_arg.open_flags); + /* + * We may have performed cached opens for all three recoveries. + * Check if we need to update the current stateid. + */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && + memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + write_seqlock(&state->seqlock); + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + write_sequnlock(&state->seqlock); } - if (newstate != state) - return -ESTALE; return 0; } @@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * OPEN_RECLAIM: * reclaim state on the server after a reboot. */ -static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; + struct nfs_delegation *delegation; struct nfs4_opendata *opendata; int delegation_type = 0; int status; - if (delegation != NULL) { - if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - memcpy(&state->stateid, &delegation->stateid, - sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - delegation_type = delegation->type; - } - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; opendata->o_arg.fh = NFS_FH(state->inode); nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) + delegation_type = delegation->flags; + rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return status; } -static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_open_reclaim(sp, state, dentry); + err = _nfs4_do_open_reclaim(ctx, state); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); + ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_state_owner *sp = state->owner; struct nfs4_opendata *opendata; int ret; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, + memcpy(opendata->o_arg.u.delegation.data, stateid->data, sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); int err; do { - err = _nfs4_open_delegation_recall(dentry, state); + err = _nfs4_open_delegation_recall(ctx, state, stateid); switch (err) { case 0: return err; @@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) memcpy(data->o_res.stateid.data, data->c_res.stateid.data, sizeof(data->o_res.stateid.data)); renew_lease(data->o_res.server, data->timestamp); + data->rpc_done = 1; } - nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); + nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); } static void nfs4_open_confirm_release(void *calldata) @@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (!data->rpc_done) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_confirm_ops = { @@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); if (IS_ERR(task)) return PTR_ERR(task); @@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) return; + /* + * Check if we still need to send an OPEN call, or if we can use + * a delegation instead. + */ + if (data->state != NULL) { + struct nfs_delegation *delegation; + + if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) + goto out_no_action; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + if (delegation != NULL && + (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { + rcu_read_unlock(); + goto out_no_action; + } + rcu_read_unlock(); + } /* Update sequence id. */ - data->o_arg.id = sp->so_id; + data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->timestamp = jiffies; rpc_call_setup(task, &msg, 0); + return; +out_no_action: + task->tk_action = NULL; + } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = -ENOTDIR; } renew_lease(data->o_res.server, data->timestamp); + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) + nfs_confirm_seqid(&data->owner->so_seqid, 0); } nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); + data->rpc_done = 1; } static void nfs4_open_release(void *calldata) @@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (data->rpc_status != 0 || !data->rpc_done) goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_ops = { @@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; + data->cancelled = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); if (IS_ERR(task)) return PTR_ERR(task); @@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } else status = data->rpc_status; rpc_put_task(task); - if (status != 0) + if (status != 0 || !data->rpc_done) return status; if (o_arg->open_flags & O_CREAT) { @@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; @@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf mask |= MAY_READ; if (openflags & FMODE_WRITE) mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out; @@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) * reclaim state on the server after a network partition. * Assumes caller holds the appropriate lock */ -static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct inode *inode = state->inode; - struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs4_opendata *opendata; - int openflags = state->state & (FMODE_READ|FMODE_WRITE); int ret; - if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - ret = _nfs4_do_access(inode, sp->so_cred, openflags); - if (ret < 0) - return ret; - memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) { /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(sp); - d_drop(dentry); + nfs4_drop_state_owner(state->owner); + d_drop(ctx->path.dentry); } - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_open_expired(sp, state, dentry); + err = _nfs4_open_expired(ctx, state); if (err == -NFS4ERR_DELAY) nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_expired(sp, state, ctx->dentry); + ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; } /* - * Returns a referenced nfs4_state if there is an open delegation on the file + * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* + * fields corresponding to attributes that were used to store the verifier. + * Make sure we clobber those fields in the later setattr call */ -static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) -{ - struct nfs_delegation *delegation; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs4_state_owner *sp = NULL; - struct nfs4_state *state = NULL; - int open_flags = flags & (FMODE_READ|FMODE_WRITE); - int err; - - err = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { - dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); - return err; - } - err = nfs4_recover_expired_lease(server); - if (err != 0) - goto out_put_state_owner; - /* Protect against reboot recovery - NOTE ORDER! */ - down_read(&clp->cl_sem); - /* Protect against delegation recall */ - down_read(&nfsi->rwsem); - delegation = NFS_I(inode)->delegation; - err = -ENOENT; - if (delegation == NULL || (delegation->type & open_flags) != open_flags) - goto out_err; - err = -ENOMEM; - state = nfs4_get_open_state(inode, sp); - if (state == NULL) - goto out_err; - - err = -ENOENT; - if ((state->state & open_flags) == open_flags) { - spin_lock(&inode->i_lock); - update_open_stateflags(state, open_flags); - spin_unlock(&inode->i_lock); - goto out_ok; - } else if (state->state != 0) - goto out_put_open_state; - - lock_kernel(); - err = _nfs4_do_access(inode, cred, open_flags); - unlock_kernel(); - if (err != 0) - goto out_put_open_state; - set_bit(NFS_DELEGATED_STATE, &state->flags); - update_open_stateid(state, &delegation->stateid, open_flags); -out_ok: - nfs4_put_state_owner(sp); - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - *res = state; - return 0; -out_put_open_state: - nfs4_put_open_state(state); -out_err: - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - if (err != -EACCES) - nfs_inode_return_delegation(inode); -out_put_state_owner: - nfs4_put_state_owner(sp); - return err; -} - -static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) { - struct nfs4_exception exception = { }; - struct nfs4_state *res = ERR_PTR(-EIO); - int err; + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && + !(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; - do { - err = _nfs4_open_delegated(inode, flags, cred, &res); - if (err == 0) - break; - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), - err, &exception)); - } while (exception.retry); - return res; + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && + !(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; } /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; - int status; + int status; /* Protect against reboot recovery conflicts */ status = -ENOMEM; @@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st status = nfs4_recover_expired_lease(server); if (status != 0) goto err_put_state_owner; + if (path->dentry->d_inode != NULL) + nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); down_read(&clp->cl_sem); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) goto err_release_rwsem; + if (path->dentry->d_inode != NULL) + opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); + status = _nfs4_proc_open(opendata); if (status != 0) - goto err_opendata_free; + goto err_opendata_put; + + if (opendata->o_arg.open_flags & O_EXCL) + nfs4_exclusive_attrset(opendata, sattr); - status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); - if (state == NULL) - goto err_opendata_free; - if (opendata->o_res.delegation_type != 0) - nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); - nfs4_opendata_free(opendata); + status = PTR_ERR(state); + if (IS_ERR(state)) + goto err_opendata_put; + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; return 0; -err_opendata_free: - nfs4_opendata_free(opendata); +err_opendata_put: + nfs4_opendata_put(opendata); err_release_rwsem: up_read(&clp->cl_sem); err_put_state_owner: @@ -1006,14 +1105,14 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + printk(KERN_WARNING "NFS: v4 server %s " + " returned a bad sequence-id error!\n", + NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; continue; } @@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, exception.retry = 1; continue; } + if (status == -EAGAIN) { + /* We must have found a delegation */ + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); @@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, } struct nfs4_closedata { + struct path path; struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; @@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); + dput(calldata->path.dentry); + mntput(calldata->path.mnt); kfree(calldata); } @@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: - memcpy(&state->stateid, &calldata->res.stateid, - sizeof(state->stateid)); + nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: @@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) .rpc_resp = &calldata->res, .rpc_cred = state->owner->so_cred, }; - int mode = 0, old_mode; + int clear_rd, clear_wr, clear_rdwr; + int mode; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - /* Recalculate the new open mode in case someone reopened the file - * while we were waiting in line to be scheduled. - */ + + mode = FMODE_READ|FMODE_WRITE; + clear_rd = clear_wr = clear_rdwr = 0; spin_lock(&state->owner->so_lock); - spin_lock(&calldata->inode->i_lock); - mode = old_mode = state->state; + /* Calculate the change in open mode */ if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { mode &= ~FMODE_READ; - if (state->n_wronly == 0) + clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { mode &= ~FMODE_WRITE; + clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } } - nfs4_state_set_mode_locked(state, mode); - spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); - if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { + if (!clear_rd && !clear_wr && !clear_rdwr) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; return; @@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +int nfs4_do_close(struct path *path, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; - calldata->inode = inode; + calldata->inode = state->inode; calldata->state = state; - calldata->arg.fh = NFS_FH(inode); - calldata->arg.stateid = &state->stateid; + calldata->arg.fh = NFS_FH(state->inode); + calldata->arg.stateid = &state->open_stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) @@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.server = server; + calldata->path.mnt = mntget(path->mnt); + calldata->path.dentry = dget(path->dentry); - status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); - if (status == 0) - goto out; - - nfs_free_seqid(calldata->arg.seqid); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; out_free_calldata: kfree(calldata); out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) { struct file *filp; + int ret; - filp = lookup_instantiate_filp(nd, dentry, NULL); + /* If the open_intent is for execute, we have an extra check to make */ + if (nd->intent.open.flags & FMODE_EXEC) { + ret = _nfs4_do_access(state->inode, + state->owner->so_cred, + nd->intent.open.flags); + if (ret < 0) + goto out_close; + } + filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; return 0; } - nfs4_close_state(state, nd->intent.open.flags); - return PTR_ERR(filp); + ret = PTR_ERR(filp); +out_close: + nfs4_close_state(path, state, nd->intent.open.flags); + return ret; } struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; @@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; - state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) @@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) dentry = res; - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return res; } int nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct rpc_cred *cred; struct nfs4_state *state; cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - state = nfs4_open_delegated(dentry->d_inode, openflags, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { @@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return 1; } - nfs4_close_state(state, openflags); + nfs4_close_state(&path, state, openflags); out_drop: d_drop(dentry); return 0; @@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, dprintk("NFS call lookupfh %s\n", name->name); status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply lookupfh: %d\n", status); - if (status == -NFS4ERR_MOVED) - status = -EREMOTE; return status; } @@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_lookupfh(server, dirfh, name, - fhandle, fattr), - &exception); + err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); + /* FIXME: !!!! */ + if (err == -NFS4ERR_MOVED) { + err = -EREMOTE; + break; + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int status; - struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_lookup_arg args = { - .bitmask = server->attr_bitmask, - .dir_fh = NFS_FH(dir), - .name = name, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; - - nfs_fattr_init(fattr); + int status; dprintk("NFS call lookup %s\n", name->name); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); if (status == -NFS4ERR_MOVED) status = nfs4_get_referral(dir, name, fattr, fhandle); dprintk("NFS reply lookup: %d\n", status); @@ -1752,6 +1870,10 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct nfs4_state *state; struct rpc_cred *cred; int status = 0; @@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = PTR_ERR(cred); goto out; } - state = nfs4_do_open(dir, dentry, flags, sattr, cred); + state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); + nfs_post_op_update_inode(state->inode, &fattr); } - if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) - status = nfs4_intent_set_file(nd, dentry, state); + if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) + status = nfs4_intent_set_file(nd, &path, state); else - nfs4_close_state(state, flags); + nfs4_close_state(&path, state, flags); out: return status; } @@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id; + arg.lock_owner.id = lsp->ls_id.id; status = rpc_call_sync(server->client, &msg, 0); switch (status) { case 0: @@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, { struct nfs4_unlockdata *data; + /* Ensure this is an unlock - when canceling a lock, the + * canceled lock is passed in, and it won't be an unlock. + */ + fl->fl_type = F_UNLCK; + data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { nfs_free_seqid(seqid); @@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id; + p->arg.lock_owner.id = lsp->ls_id.id; p->lsp = lsp; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); @@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8ed79d5c54f..e9662ba81d8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -38,12 +38,14 @@ * subsequent patch. */ +#include <linux/kernel.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/kthread.h> #include <linux/module.h> +#include <linux/random.h> #include <linux/workqueue.h> #include <linux/bitops.h> @@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -u32 -nfs4_alloc_lockowner_id(struct nfs_client *clp) -{ - return clp->cl_lockowner_id ++; -} - -static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) -{ - struct nfs4_state_owner *sp = NULL; - - if (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list); - atomic_inc(&sp->so_count); - sp->so_cred = cred; - list_move(&sp->so_list, &clp->cl_state_owners); - clp->cl_nunused--; - } - return sp; -} - struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct rpc_cred *cred = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); if (list_empty(&sp->so_states)) continue; cred = get_rpccred(sp->so_cred); @@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; - if (!list_empty(&clp->cl_state_owners)) { - sp = list_entry(clp->cl_state_owners.next, - struct nfs4_state_owner, so_list); + pos = rb_first(&clp->cl_state_owners); + if (pos != NULL) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); return get_rpccred(sp->so_cred); } return NULL; } +static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, + __u64 minval, int maxbits) +{ + struct rb_node **p, *parent; + struct nfs_unique_id *pos; + __u64 mask = ~0ULL; + + if (maxbits < 64) + mask = (1ULL << maxbits) - 1ULL; + + /* Ensure distribution is more or less flat */ + get_random_bytes(&new->id, sizeof(new->id)); + new->id &= mask; + if (new->id < minval) + new->id += minval; +retry: + p = &root->rb_node; + parent = NULL; + + while (*p != NULL) { + parent = *p; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + + if (new->id < pos->id) + p = &(*p)->rb_left; + else if (new->id > pos->id) + p = &(*p)->rb_right; + else + goto id_exists; + } + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + return; +id_exists: + for (;;) { + new->id++; + if (new->id < minval || (new->id & mask) != new->id) { + new->id = minval; + break; + } + parent = rb_next(parent); + if (parent == NULL) + break; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + if (new->id < pos->id) + break; + } + goto retry; +} + +static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) +{ + rb_erase(&id->rb_node, root); +} + static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) { + struct nfs_client *clp = server->nfs_client; + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; struct nfs4_state_owner *sp, *res = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_cred != cred) + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (server < sp->so_server) { + p = &parent->rb_left; continue; - atomic_inc(&sp->so_count); - /* Move to the head of the list */ - list_move(&sp->so_list, &clp->cl_state_owners); - res = sp; - break; + } + if (server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (cred < sp->so_cred) + p = &parent->rb_left; + else if (cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + res = sp; + break; + } } return res; } +static struct nfs4_state_owner * +nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) +{ + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; + struct nfs4_state_owner *sp; + + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (new->so_server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (new->so_server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (new->so_cred < sp->so_cred) + p = &parent->rb_left; + else if (new->so_cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + return sp; + } + } + nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); + rb_link_node(&new->so_client_node, parent, p); + rb_insert_color(&new->so_client_node, &clp->cl_state_owners); + return new; +} + +static void +nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) +{ + if (!RB_EMPTY_NODE(&sp->so_client_node)) + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs_client *clp = sp->so_client; - spin_lock(&clp->cl_lock); - list_del_init(&sp->so_list); - spin_unlock(&clp->cl_lock); + if (!RB_EMPTY_NODE(&sp->so_client_node)) { + struct nfs_client *clp = sp->so_client; + + spin_lock(&clp->cl_lock); + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + RB_CLEAR_NODE(&sp->so_client_node); + spin_unlock(&clp->cl_lock); + } } /* @@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; - get_rpccred(cred); - new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_find_state_owner(clp, cred); - if (sp == NULL) - sp = nfs4_client_grab_unused(clp, cred); - if (sp == NULL && new != NULL) { - list_add(&new->so_list, &clp->cl_state_owners); - new->so_client = clp; - new->so_id = nfs4_alloc_lockowner_id(clp); - new->so_cred = cred; - sp = new; - new = NULL; - } + sp = nfs4_find_state_owner(server, cred); spin_unlock(&clp->cl_lock); - kfree(new); if (sp != NULL) return sp; - put_rpccred(cred); - return NULL; + new = nfs4_alloc_state_owner(); + if (new == NULL) + return NULL; + new->so_client = clp; + new->so_server = server; + new->so_cred = cred; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner(clp, new); + spin_unlock(&clp->cl_lock); + if (sp == new) + get_rpccred(cred); + else + kfree(new); + return sp; } /* @@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) - goto out_free; - if (list_empty(&sp->so_list)) - goto out_free; - list_move(&sp->so_list, &clp->cl_unused); - clp->cl_nunused++; - spin_unlock(&clp->cl_lock); - put_rpccred(cred); - cred = NULL; - return; -out_free: - list_del(&sp->so_list); + nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); put_rpccred(cred); kfree(sp); @@ -236,6 +325,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); + seqlock_init(&state->seqlock); return state; } @@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { - /* Is this in the process of being freed? */ - if (state->state == 0) + if (state->owner != owner) continue; - if (state->owner == owner) { - atomic_inc(&state->count); + if (atomic_inc_not_zero(&state->count)) return state; - } } return NULL; } @@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -void nfs4_close_state(struct nfs4_state *state, mode_t mode) +void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { - struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - int oldstate, newstate = 0; + int call_close = 0; + int newstate; atomic_inc(&owner->so_count); /* Protect against nfs4_find_state() */ spin_lock(&owner->so_lock); - spin_lock(&inode->i_lock); switch (mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_READ: state->n_rdonly--; @@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) case FMODE_READ|FMODE_WRITE: state->n_rdwr--; } - oldstate = newstate = state->state; + newstate = FMODE_READ|FMODE_WRITE; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { newstate &= ~FMODE_READ; - if (state->n_wronly == 0) + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { newstate &= ~FMODE_WRITE; + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (newstate == 0) + clear_bit(NFS_DELEGATED_STATE, &state->flags); } - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - nfs4_state_set_mode_locked(state, newstate); - oldstate = newstate; - } - spin_unlock(&inode->i_lock); + nfs4_state_set_mode_locked(state, newstate); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(inode, state) == 0) - return; - nfs4_put_open_state(state); - nfs4_put_state_owner(owner); + if (!call_close) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state); } /* @@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); - lsp->ls_id = nfs4_alloc_lockowner_id(clp); + nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); spin_unlock(&clp->cl_lock); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } +static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +{ + struct nfs_client *clp = lsp->ls_state->owner->so_client; + + spin_lock(&clp->cl_lock); + nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); + spin_unlock(&clp->cl_lock); + kfree(lsp); +} + /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ return NULL; } spin_unlock(&state->state_lock); - kfree(new); + if (new != NULL) + nfs4_free_lock_state(new); return lsp; } @@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - kfree(lsp); + nfs4_free_lock_state(lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; + int seq; - memcpy(dst, &state->stateid, sizeof(*dst)); + do { + seq = read_seqbegin(&state->seqlock); + memcpy(dst, &state->stateid, sizeof(*dst)); + } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) return; @@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid) * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() */ -static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { switch (status) { case 0: break; case -NFS4ERR_BAD_SEQID: + if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) + return; + printk(KERN_WARNING "NFS: v4 server returned a bad" + "sequence-id error on an" + "unconfirmed sequence %p!\n", + seqid->sequence); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); } - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } /* @@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) */ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) { - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) @@ -748,15 +860,21 @@ out_err: static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state *state; struct nfs4_lock_state *lock; /* Reset all sequence ids to zero */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); sp->so_seqid.counter = 0; sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { + clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); list_for_each_entry(lock, &state->lock_states, ls_locks) { lock->ls_seqid.counter = 0; lock->ls_seqid.flags = 0; @@ -771,6 +889,7 @@ static int reclaimer(void *ptr) { struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; int status = 0; @@ -816,7 +935,8 @@ restart_loop: /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { if (status == -NFS4ERR_NO_GRACE) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8003c91ccb9..c08738441f7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int); #endif /* lock,open owner id: - * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define owner_id_maxsz (1 + 1) +#define open_owner_id_maxsz (1 + 4) +#define lock_owner_id_maxsz (1 + 4) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int); #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + 2 * nfs4_name_maxsz)) + 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int); 3 + (NFS4_VERIFIER_SIZE >> 2)) #define decode_setclientid_confirm_maxsz \ (op_decode_hdr_maxsz) -#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ - 1 + ((3 + NFS4_FHSIZE) >> 2)) +#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) +#define decode_lookup_maxsz (op_decode_hdr_maxsz) +#define encode_share_access_maxsz \ + (2) +#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz) +#define encode_opentype_maxsz (1 + encode_createmode_maxsz) +#define encode_claim_null_maxsz (1 + nfs4_name_maxsz) +#define encode_open_maxsz (op_encode_hdr_maxsz + \ + 2 + encode_share_access_maxsz + 2 + \ + open_owner_id_maxsz + \ + encode_opentype_maxsz + \ + encode_claim_null_maxsz) +#define decode_ace_maxsz (3 + nfs4_owner_maxsz) +#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \ + decode_ace_maxsz) +#define decode_change_info_maxsz (5) +#define decode_open_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + decode_change_info_maxsz + 1 + \ + nfs4_fattr_bitmap_maxsz + \ + decode_delegation_maxsz) #define encode_remove_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define encode_rename_maxsz (op_encode_hdr_maxsz + \ @@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int); #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ nfs4_fattr_maxsz) -#define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define decode_create_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + nfs4_fattr_bitmap_maxsz) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) +#define encode_fs_locations_maxsz \ + (encode_getattr_maxsz) +#define decode_fs_locations_maxsz \ + (0) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ @@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 2 + \ decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ - encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 13 + 3 + 2 + 64 + \ - encode_getattr_maxsz + \ - encode_getfh_maxsz) + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_open_maxsz + \ + encode_getfh_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ - decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz + \ - decode_getfh_maxsz) + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_open_maxsz + \ + decode_getfh_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 4) #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 11) + encode_open_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - 4 + 5 + 2 + 3) + decode_open_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int); op_encode_hdr_maxsz + \ 1 + 1 + 2 + 2 + \ 1 + 4 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz + \ op_decode_hdr_maxsz + \ 2 + 2 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ 1 + 2 + 2 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int); encode_getfh_maxsz) #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ + decode_lookup_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ @@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_fs_locations_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz) + encode_lookup_maxsz + \ + encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz) + decode_lookup_maxsz + \ + decode_fs_locations_maxsz) static struct { unsigned int mode; @@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITE64(nfs4_lock_length(args->fl)); WRITE32(args->new_lock_owner); if (args->new_lock_owner){ - RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); WRITE32(args->open_seqid->sequence->counter); WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); } else { RESERVE_SPACE(NFS4_STATEID_SIZE+4); @@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg { __be32 *p; - RESERVE_SPACE(40); + RESERVE_SPACE(52); WRITE32(OP_LOCKT); WRITE32(nfs4_lock_type(args->fl, 0)); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); return 0; } @@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena WRITE32(OP_OPEN); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); - RESERVE_SPACE(16); + RESERVE_SPACE(28); WRITE64(arg->clientid); - WRITE32(4); - WRITE32(arg->id); + WRITE32(16); + WRITEMEM("open id:", 8); + WRITE64(arg->id); } static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) @@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; __be32 *p; @@ -1735,7 +1772,7 @@ out: */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { .nops = 2, }; @@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs struct compound_hdr hdr = { .nops = 3, }; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; int status; @@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { __be32 *p; - uint32_t bmlen; + uint32_t savewords, bmlen, i; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) goto xdr_error; READ_BUF(bmlen << 2); - p += bmlen; + savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); + for (i = 0; i < savewords; ++i) + READ32(res->attrset[i]); + for (; i < NFS4_BITMAP_SIZE; i++) + res->attrset[i] = 0; + return decode_delegation(xdr, res); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 49d1008ce1d..3490322d114 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpcb_getport_external(&sin, program, version, proto); + return rpcb_getport_sync(&sin, program, version, proto); } @@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void) NFS_MNT3_VERSION : NFS_MNT_VERSION; set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); + status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, + nfs_path, version, protocol, &fh); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " "while mounting %s\n", status, nfs_path); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c5bb51a29e8..f56dae5216f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - atomic_set(&req->wb_count, 1); req->wb_context = get_nfs_open_context(ctx); - + kref_init(&req->wb_kref); return req; } @@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req) } /** - * nfs_set_page_writeback_locked - Lock a request for writeback + * nfs_set_page_tag_locked - Tag a request as locked * @req: */ -int nfs_set_page_writeback_locked(struct nfs_page *req) +static int nfs_set_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (!nfs_lock_request(req)) return 0; - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } /** - * nfs_clear_page_writeback - Unlock request and wake up sleepers + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ -void nfs_clear_page_writeback(struct nfs_page *req) +void nfs_clear_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); if (req->wb_page != NULL) { - spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); - spin_unlock(&nfsi->req_lock); + spin_lock(&inode->i_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); } nfs_unlock_request(req); } @@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req) * * Note: Should never be called with the spinlock held! */ -void -nfs_release_request(struct nfs_page *req) +static void nfs_free_request(struct kref *kref) { - if (!atomic_dec_and_test(&req->wb_count)) - return; + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); /* Release struct file or cached credential */ nfs_clear_request(req); @@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } +void nfs_release_request(struct nfs_page *req) +{ + kref_put(&req->wb_kref, nfs_free_request); +} + static int nfs_wait_bit_interruptible(void *word) { int ret = 0; @@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word) int nfs_wait_on_request(struct nfs_page *req) { - struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode); sigset_t oldmask; int ret = 0; @@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) /** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode - * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space * starting at index idx_start, is scanned. * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's req_lock when calling this function + * You must be holding the inode's i_lock when calling this function */ -int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, - unsigned int npages) + unsigned int npages, int tag) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; @@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, idx_end = idx_start + npages - 1; for (;;) { - found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES); + NFS_SCAN_MAXENTRIES, tag); if (found <= 0) break; for (i = 0; i < found; i++) { @@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; - if (req->wb_list_head != head) - continue; - if (nfs_set_page_writeback_locked(req)) { + if (nfs_set_page_tag_locked(req)) { nfs_list_remove_request(req); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); nfs_list_add_request(req, dst); res++; + if (res == INT_MAX) + goto out; } } - + /* for latency reduction */ + cond_resched_lock(&nfsi->vfs_inode.i_lock); } out: return res; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7bd7cb95c03..6ae2e58ed05 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_clear_request(req); @@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, int flags; data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page) */ error = nfs_wb_page(inode, page); if (error) - goto out_error; + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; error = -ESTALE; if (NFS_STALE(inode)) - goto out_error; + goto out_unlock; if (file == NULL) { error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - goto out_error; + goto out_unlock; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page) put_nfs_open_context(ctx); return error; - -out_error: +out_unlock: unlock_page(page); return error; } @@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page) struct inode *inode = page->mapping->host; struct nfs_page *new; unsigned int len; + int error; + + error = nfs_wb_page(inode, page); + if (error) + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; - nfs_wb_page(inode, page); len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + new = nfs_create_request(desc->ctx, inode, page, 0, len); - if (IS_ERR(new)) { - SetPageError(page); - unlock_page(page); - return PTR_ERR(new); - } + if (IS_ERR(new)) + goto out_error; + if (len < PAGE_CACHE_SIZE) zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; +out_error: + error = PTR_ERR(new); + SetPageError(page); +out_unlock: + unlock_page(page); + return error; } int nfs_readpages(struct file *filp, struct address_space *mapping, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ca20d3cc260..a2b1af89ca1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -45,6 +45,7 @@ #include <linux/inet.h> #include <linux/nfs_xdr.h> #include <linux/magic.h> +#include <linux/parser.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -57,6 +58,167 @@ #define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + + struct { + struct sockaddr_in address; + unsigned int program; + unsigned int version; + unsigned short port; + int protocol; + } mount_server; + + struct { + struct sockaddr_in address; + char *hostname; + char *export_path; + unsigned int program; + int protocol; + } nfs_server; +}; + +enum { + /* Mount options that take no arguments */ + Opt_soft, Opt_hard, + Opt_intr, Opt_nointr, + Opt_posix, Opt_noposix, + Opt_cto, Opt_nocto, + Opt_ac, Opt_noac, + Opt_lock, Opt_nolock, + Opt_v2, Opt_v3, + Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, + Opt_rdirplus, Opt_nordirplus, + Opt_sharecache, Opt_nosharecache, + + /* Mount options that take integer arguments */ + Opt_port, + Opt_rsize, Opt_wsize, Opt_bsize, + Opt_timeo, Opt_retrans, + Opt_acregmin, Opt_acregmax, + Opt_acdirmin, Opt_acdirmax, + Opt_actimeo, + Opt_namelen, + Opt_mountport, + Opt_mountprog, Opt_mountvers, + Opt_nfsprog, Opt_nfsvers, + + /* Mount options that take string arguments */ + Opt_sec, Opt_proto, Opt_mountproto, + Opt_addr, Opt_mounthost, Opt_clientaddr, + + /* Mount options that are ignored */ + Opt_userspace, Opt_deprecated, + + Opt_err +}; + +static match_table_t nfs_mount_option_tokens = { + { Opt_userspace, "bg" }, + { Opt_userspace, "fg" }, + { Opt_soft, "soft" }, + { Opt_hard, "hard" }, + { Opt_intr, "intr" }, + { Opt_nointr, "nointr" }, + { Opt_posix, "posix" }, + { Opt_noposix, "noposix" }, + { Opt_cto, "cto" }, + { Opt_nocto, "nocto" }, + { Opt_ac, "ac" }, + { Opt_noac, "noac" }, + { Opt_lock, "lock" }, + { Opt_nolock, "nolock" }, + { Opt_v2, "v2" }, + { Opt_v3, "v3" }, + { Opt_udp, "udp" }, + { Opt_tcp, "tcp" }, + { Opt_acl, "acl" }, + { Opt_noacl, "noacl" }, + { Opt_rdirplus, "rdirplus" }, + { Opt_nordirplus, "nordirplus" }, + { Opt_sharecache, "sharecache" }, + { Opt_nosharecache, "nosharecache" }, + + { Opt_port, "port=%u" }, + { Opt_rsize, "rsize=%u" }, + { Opt_wsize, "wsize=%u" }, + { Opt_bsize, "bsize=%u" }, + { Opt_timeo, "timeo=%u" }, + { Opt_retrans, "retrans=%u" }, + { Opt_acregmin, "acregmin=%u" }, + { Opt_acregmax, "acregmax=%u" }, + { Opt_acdirmin, "acdirmin=%u" }, + { Opt_acdirmax, "acdirmax=%u" }, + { Opt_actimeo, "actimeo=%u" }, + { Opt_userspace, "retry=%u" }, + { Opt_namelen, "namlen=%u" }, + { Opt_mountport, "mountport=%u" }, + { Opt_mountprog, "mountprog=%u" }, + { Opt_mountvers, "mountvers=%u" }, + { Opt_nfsprog, "nfsprog=%u" }, + { Opt_nfsvers, "nfsvers=%u" }, + { Opt_nfsvers, "vers=%u" }, + + { Opt_sec, "sec=%s" }, + { Opt_proto, "proto=%s" }, + { Opt_mountproto, "mountproto=%s" }, + { Opt_addr, "addr=%s" }, + { Opt_clientaddr, "clientaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, + + { Opt_err, NULL } +}; + +enum { + Opt_xprt_udp, Opt_xprt_tcp, + + Opt_xprt_err +}; + +static match_table_t nfs_xprt_protocol_tokens = { + { Opt_xprt_udp, "udp" }, + { Opt_xprt_tcp, "tcp" }, + + { Opt_xprt_err, NULL } +}; + +enum { + Opt_sec_none, Opt_sec_sys, + Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, + Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, + Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, + + Opt_sec_err +}; + +static match_table_t nfs_secflavor_tokens = { + { Opt_sec_none, "none" }, + { Opt_sec_none, "null" }, + { Opt_sec_sys, "sys" }, + + { Opt_sec_krb5, "krb5" }, + { Opt_sec_krb5i, "krb5i" }, + { Opt_sec_krb5p, "krb5p" }, + + { Opt_sec_lkey, "lkey" }, + { Opt_sec_lkeyi, "lkeyi" }, + { Opt_sec_lkeyp, "lkeyp" }, + + { Opt_sec_err, NULL } +}; + + static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); @@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) { RPC_AUTH_GSS_SPKM, "spkm" }, { RPC_AUTH_GSS_SPKMI, "spkmi" }, { RPC_AUTH_GSS_SPKMP, "spkmp" }, - { -1, "unknown" } + { UINT_MAX, "unknown" } }; int i; - for (i=0; sec_flavours[i].flavour != -1; i++) { + for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) { if (sec_flavours[i].flavour == flavour) break; } @@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; @@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { + struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); + struct rpc_clnt *rpc; + shrink_submounts(vfsmnt, &nfs_automount_list); + + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); } /* - * Validate the NFS2/NFS3 mount data - * - fills in the mount root filehandle + * Sanity-check a server address provided by the mount command */ -static int nfs_validate_mount_data(struct nfs_mount_data *data, - struct nfs_fh *mntfh) +static int nfs_verify_server_address(struct sockaddr *addr) { - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *) addr; + if (sa->sin_addr.s_addr != INADDR_ANY) + return 1; + break; + } } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; + return 0; +} + +/* + * Error-check and convert a string of mount options from user space into + * a data structure + */ +static int nfs_parse_mount_options(char *raw, + struct nfs_parsed_mount_data *mnt) +{ + char *p, *string; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - return -EINVAL; + while ((p = strsep(&raw, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int option, token; + + if (!*p) + continue; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { + case Opt_soft: + mnt->flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + mnt->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_intr: + mnt->flags |= NFS_MOUNT_INTR; + break; + case Opt_nointr: + mnt->flags &= ~NFS_MOUNT_INTR; + break; + case Opt_posix: + mnt->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + mnt->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + mnt->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + mnt->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + mnt->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + mnt->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + mnt->flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + mnt->flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case Opt_v3: + mnt->flags |= NFS_MOUNT_VER3; + break; + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + case Opt_acl: + mnt->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + mnt->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + mnt->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + mnt->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + mnt->flags |= NFS_MOUNT_UNSHARED; + break; + + case Opt_port: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->nfs_server.address.sin_port = htonl(option); + break; + case Opt_rsize: + if (match_int(args, &mnt->rsize)) + return 0; + break; + case Opt_wsize: + if (match_int(args, &mnt->wsize)) + return 0; + break; + case Opt_bsize: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->bsize = option; + break; + case Opt_timeo: + if (match_int(args, &mnt->timeo)) + return 0; + break; + case Opt_retrans: + if (match_int(args, &mnt->retrans)) + return 0; + break; + case Opt_acregmin: + if (match_int(args, &mnt->acregmin)) + return 0; + break; + case Opt_acregmax: + if (match_int(args, &mnt->acregmax)) + return 0; + break; + case Opt_acdirmin: + if (match_int(args, &mnt->acdirmin)) + return 0; + break; + case Opt_acdirmax: + if (match_int(args, &mnt->acdirmax)) + return 0; + break; + case Opt_actimeo: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->acregmin = + mnt->acregmax = + mnt->acdirmin = + mnt->acdirmax = option; + break; + case Opt_namelen: + if (match_int(args, &mnt->namlen)) + return 0; + break; + case Opt_mountport: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->mount_server.port = option; + break; + case Opt_mountprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.program = option; + break; + case Opt_mountvers: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.version = option; + break; + case Opt_nfsprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->nfs_server.program = option; + break; + case Opt_nfsvers: + if (match_int(args, &option)) + return 0; + switch (option) { + case 2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case 3: + mnt->flags |= NFS_MOUNT_VER3; + break; + default: + goto out_unrec_vers; } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - return -EINVAL; + break; + + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_secflavor_tokens, args); + kfree(string); + + /* + * The flags setting is for v2/v3. The flavor_len + * setting is for v4. v2/v3 also need to know the + * difference between NULL and UNIX. + */ + switch (token) { + case Opt_sec_none: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + goto out_unrec_sec; } - case 5: - memset(data->context, 0, sizeof(data->context)); - } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->mount_server.protocol = IPPROTO_UDP; + break; + case Opt_tcp: + mnt->mount_server.protocol = IPPROTO_TCP; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->nfs_server.address.sin_family = AF_INET; + mnt->nfs_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; + case Opt_clientaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->client_address = string; + break; + case Opt_mounthost: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->mount_server.address.sin_family = AF_INET; + mnt->mount_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; - /* Set the pseudoflavor */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; + case Opt_userspace: + case Opt_deprecated: + break; -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - return -EPROTONOSUPPORT; + default: + goto out_unknown; + } } -#endif /* CONFIG_NFS_V3 */ - /* We now require that the mount process passes the remote address */ - if (data->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - return -EINVAL; + return 1; + +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return 0; + +out_unrec_vers: + printk(KERN_INFO "NFS: unrecognized NFS version number\n"); + return 0; + +out_unrec_xprt: + printk(KERN_INFO "NFS: unrecognized transport protocol\n"); + return 0; + +out_unrec_sec: + printk(KERN_INFO "NFS: unrecognized security flavor\n"); + return 0; + +out_unknown: + printk(KERN_INFO "NFS: unknown mount option: %s\n", p); + return 0; +} + +/* + * Use the remote server's MOUNT service to request the NFS file handle + * corresponding to the provided path. + */ +static int nfs_try_mount(struct nfs_parsed_mount_data *args, + struct nfs_fh *root_fh) +{ + struct sockaddr_in sin; + int status; + + if (args->mount_server.version == 0) { + if (args->flags & NFS_MOUNT_VER3) + args->mount_server.version = NFS_MNT3_VERSION; + else + args->mount_server.version = NFS_MNT_VERSION; } - /* Prepare the root filehandle */ - if (data->flags & NFS_MOUNT_VER3) - mntfh->size = data->root.size; + /* + * Construct the mount server's address. + */ + if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY) + sin = args->mount_server.address; else - mntfh->size = NFS2_FHSIZE; + sin = args->nfs_server.address; + if (args->mount_server.port == 0) { + status = rpcb_getport_sync(&sin, + args->mount_server.program, + args->mount_server.version, + args->mount_server.protocol); + if (status < 0) + goto out_err; + sin.sin_port = htons(status); + } else + sin.sin_port = htons(args->mount_server.port); + + /* + * Now ask the mount server to map our export path + * to a file handle. + */ + status = nfs_mount((struct sockaddr *) &sin, + sizeof(sin), + args->nfs_server.hostname, + args->nfs_server.export_path, + args->mount_server.version, + args->mount_server.protocol, + root_fh); + if (status < 0) + goto out_err; + + return status; - if (mntfh->size > sizeof(mntfh->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - return -EINVAL; +out_err: + dfprintk(MOUNT, "NFS: unable to contact server on host " + NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); + return status; +} + +/* + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + * + * XXX: as far as I can tell, changing the NFS program number is not + * supported in the NFS client. + */ +static int nfs_validate_mount_data(struct nfs_mount_data **options, + struct nfs_fh *mntfh, + const char *dev_name) +{ + struct nfs_mount_data *data = *options; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + case 5: + memset(data->context, 0, sizeof(data->context)); + case 6: + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; + + if (mntfh->size > sizeof(mntfh->data)) + goto out_invalid_fh; + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + break; + default: { + unsigned int len; + char *c; + int status; + struct nfs_parsed_mount_data args = { + .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .mount_server.protocol = IPPROTO_UDP, + .mount_server.program = NFS_MNT_PROGRAM, + .nfs_server.protocol = IPPROTO_TCP, + .nfs_server.program = NFS_PROGRAM, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* + * NB: after this point, caller will free "data" + * if we return an error + */ + *options = data; + + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + len = c - dev_name - 1; + if (len > sizeof(data->hostname)) + return -EINVAL; + strncpy(data->hostname, dev_name, len); + args.nfs_server.hostname = data->hostname; + + c++; + if (strlen(c) > NFS_MAXPATHLEN) + return -EINVAL; + args.nfs_server.export_path = c; + + status = nfs_try_mount(&args, mntfh); + if (status) + return -EINVAL; + + /* + * Translate to nfs_mount_data, which nfs_fill_super + * can deal with. + */ + data->version = 6; + data->flags = args.flags; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->addr = args.nfs_server.address; + data->namlen = args.namlen; + data->bsize = args.bsize; + data->pseudoflavor = args.auth_flavors[0]; + + break; + } } - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + +#ifndef CONFIG_NFS_V3 + if (data->flags & NFS_MOUNT_VER3) + goto out_v3_not_compiled; +#endif /* !CONFIG_NFS_V3 */ + + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) + goto out_no_address; return 0; + +out_no_data: + dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_no_v3: + dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", + data->version); + return -EINVAL; + +out_no_sec: + dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); + return -EINVAL; + +#ifndef CONFIG_NFS_V3 +out_v3_not_compiled: + dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V3 */ + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_fh: + dfprintk(MOUNT, "NFS: invalid root filehandle\n"); + return -EINVAL; } /* @@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data) { struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->nfs_client != server->nfs_client) + if (memcmp(&old->nfs_client->cl_addr, + &server->nfs_client->cl_addr, + sizeof(old->nfs_client->cl_addr)) != 0) + return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) return 0; if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; return 1; } +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +{ + const struct nfs_server *a = s->s_fs_info; + const struct rpc_clnt *clnt_a = a->client; + const struct rpc_clnt *clnt_b = b->client; + + if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + goto Ebusy; + if (a->nfs_client != b->nfs_client) + goto Ebusy; + if (a->flags != b->flags) + goto Ebusy; + if (a->wsize != b->wsize) + goto Ebusy; + if (a->rsize != b->rsize) + goto Ebusy; + if (a->acregmin != b->acregmin) + goto Ebusy; + if (a->acregmax != b->acregmax) + goto Ebusy; + if (a->acdirmin != b->acdirmin) + goto Ebusy; + if (a->acdirmax != b->acdirmax) + goto Ebusy; + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + goto Ebusy; + return 0; +Ebusy: + return -EBUSY; +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; /* Validate the mount data */ - error = nfs_validate_mount_data(data, &mntfh); + error = nfs_validate_mount_data(&data, &mntfh, dev_name); if (error < 0) - return error; + goto out; /* Get a volume representation */ server = nfs_create_server(data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; - return 0; + error = 0; + +out: + if (data != raw_data) + kfree(data); + return error; out_err_nosb: nfs_free_server(server); -out_err_noserver: - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - return error; + goto out; } /* @@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs_xdev_get_sb()\n"); @@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } -static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +/* + * Validate NFSv4 mount options + */ +static int nfs4_validate_mount_data(struct nfs4_mount_data **options, + const char *dev_name, + struct sockaddr_in *addr, + rpc_authflavor_t *authflavour, + char **hostname, + char **mntpath, + char **ip_addr) { - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - kfree(p); - return ERR_PTR(-EFAULT); + struct nfs4_mount_data *data = *options; + char *c; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + if (data->host_addrlen != sizeof(*addr)) + goto out_no_address; + if (copy_from_user(addr, data->host_addr, sizeof(*addr))) + return -EFAULT; + if (addr->sin_port == 0) + addr->sin_port = htons(NFS_PORT); + if (!nfs_verify_server_address((struct sockaddr *) addr)) + goto out_no_address; + + switch (data->auth_flavourlen) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + if (copy_from_user(authflavour, data->auth_flavours, + sizeof(*authflavour))) + return -EFAULT; + break; + default: + goto out_inval_auth; + } + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *mntpath = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + *ip_addr = c; + + break; + default: { + unsigned int len; + struct nfs_parsed_mount_data args = { + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .nfs_server.protocol = IPPROTO_TCP, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address((struct sockaddr *) + &args.nfs_server.address)) + return -EINVAL; + *addr = args.nfs_server.address; + + switch (args.auth_flavor_len) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; + break; + default: + goto out_inval_auth; + } + + /* + * Translate to nfs4_mount_data, which nfs4_fill_super + * can deal with. + */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + *options = data; + + data->version = 1; + data->flags = args.flags & NFS4_MOUNT_FLAGMASK; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->proto = args.nfs_server.protocol; + + /* + * Split "dev_name" into "hostname:mntpath". + */ + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + /* while calculating len, pretend ':' is '\0' */ + len = c - dev_name; + if (len > NFS4_MAXNAMLEN) + return -EINVAL; + *hostname = kzalloc(len, GFP_KERNEL); + if (*hostname == NULL) + return -ENOMEM; + strncpy(*hostname, dev_name, len - 1); + + c++; /* step over the ':' */ + len = strlen(c); + if (len > NFS4_MAXPATHLEN) + return -EINVAL; + *mntpath = kzalloc(len + 1, GFP_KERNEL); + if (*mntpath == NULL) + return -ENOMEM; + strncpy(*mntpath, c, len); + + dprintk("MNTPATH: %s\n", *mntpath); + + *ip_addr = args.client_address; + + break; + } } - dst[maxlen] = '\0'; - return dst; + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_inval_auth: + dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", + data->auth_flavourlen); + return -EINVAL; + +out_no_address: + dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); + return -EINVAL; } /* @@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type, rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; - char *mntpath = NULL, *hostname = NULL, ip_addr[16]; - void *p; + char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; - } - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(addr)) - return -EINVAL; - - if (copy_from_user(&addr, data->host_addr, sizeof(addr))) - return -EFAULT; - - if (addr.sin_family != AF_INET || - addr.sin_addr.s_addr == INADDR_ANY - ) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - return -EINVAL; - } - /* RFC3530: The default port for NFS is 2049 */ - if (addr.sin_port == 0) - addr.sin_port = htons(NFS_PORT); - - /* Grab the authentication type */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - error = -EINVAL; - goto out_err_noserver; - } - - if (copy_from_user(&authflavour, data->auth_flavours, - sizeof(authflavour))) { - error = -EFAULT; - goto out_err_noserver; - } - } - - p = nfs_copy_user_string(NULL, &data->hostname, 256); - if (IS_ERR(p)) - goto out_err; - hostname = p; - - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); - if (IS_ERR(p)) - goto out_err; - mntpath = p; - - dprintk("MNTPATH: %s\n", mntpath); - - p = nfs_copy_user_string(ip_addr, &data->client_addr, - sizeof(ip_addr) - 1); - if (IS_ERR(p)) - goto out_err; + /* Validate the mount data */ + error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, + &hostname, &mntpath, &ip_addr); + if (error < 0) + goto out; /* Get a volume representation */ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, authflavour, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; + error = 0; + +out: + kfree(ip_addr); kfree(mntpath); kfree(hostname); - return 0; - -out_err: - error = PTR_ERR(p); - goto out_err_noserver; + return error; out_free: nfs_free_server(server); -out_err_noserver: - kfree(mntpath); - kfree(hostname); - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - goto out_err_noserver; + goto out; } static void nfs4_kill_super(struct super_block *sb) @@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_xdev_get_sb()\n"); @@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot; struct nfs_fh mntfh; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_referral_get_sb()\n"); @@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af344a158e0..73ac992ece8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) if (PagePrivate(page)) { req = (struct nfs_page *)page_private(page); if (req != NULL) - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); } return req; } static struct nfs_page *nfs_page_find_request(struct page *page) { + struct inode *inode = page->mapping->host; struct nfs_page *req = NULL; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return req; } @@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, } /* Update file length */ nfs_grow_file(page, offset, count); - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); return 0; } @@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page) static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { + struct inode *inode = page->mapping->host; + struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - spinlock_t *req_lock = &nfsi->req_lock; int ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); for(;;) { req = nfs_page_find_request_locked(page); if (req == NULL) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return 1; } if (nfs_lock_request_dontget(req)) @@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) return ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); } if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { /* This request is marked for commit */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); return 1; } if (nfs_set_page_writeback(page) != 0) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); BUG(); } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_WRITEBACK); + NFS_PAGE_TAG_LOCKED); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); return ret; } @@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (PageDirty(req->wb_page)) set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 0; } @@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); @@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); iput(inode); } else - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_clear_request(req); nfs_release_request(req); } @@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfsi->req_lock); - nfs_list_add_request(req, &nfsi->commit); + spin_lock(&inode->i_lock); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); - spin_unlock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); + spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } @@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u idx_end = idx_start + npages - 1; next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; BUG_ON(!NFS_WBACK_BUSY(req)); - atomic_inc(&req->wb_count); - spin_unlock(&nfsi->req_lock); + kref_get(&req->wb_kref); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); if (error < 0) return error; res++; @@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, dst, idx_start, npages, + NFS_PAGE_TAG_COMMIT); nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); } return res; } @@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; pgoff_t rqend, end; @@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { if (!nfs_lock_request_dontget(req)) { int error; - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { @@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, } continue; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (new) nfs_release_request(new); break; @@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, nfs_lock_request_dontget(new); error = nfs_inode_add_request(inode, new); if (error) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(new); return ERR_PTR(error); } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return new; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) @@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + if (PageError(req->wb_page)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else if (!nfs_reschedule_unstable_write(req)) { + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } static inline int flush_task_priority(int how) @@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -885,7 +888,7 @@ out_bad: } nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) } if (nfs_write_need_commit(data)) { - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct inode *inode = page->mapping->host; - spin_lock(req_lock); + spin_lock(&inode->i_lock); if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { /* Do nothing we need to resend the writes */ } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { @@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) clear_bit(PG_NEED_COMMIT, &req->wb_flags); dprintk(" server reboot detected\n"); } - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); } else dprintk(" OK\n"); @@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) dprintk(" marked for commit\n"); goto next; } + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - inode = first->wb_context->dentry->d_inode; + inode = first->wb_context->path.dentry->d_inode; data->inode = inode; data->cred = first->wb_context->cred; @@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_mark_request_commit(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { @@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, + req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; @@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk(" mismatch\n"); nfs_redirty_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = { int nfs_commit_inode(struct inode *inode, int how) { - struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); int res; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (res) { int error = nfs_commit_list(inode, &head, how); if (error < 0) @@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); @@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr } } how &= ~FLUSH_NOCOMMIT; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) @@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr if (pages == 0) break; if (how & FLUSH_INVALIDATE) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_cancel_commit_list(&head); ret = pages; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); continue; } pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); + } while (ret >= 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return ret; } @@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; - spinlock_t *req_lock; struct nfs_page *req; int ret; @@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page) inode = mapping->host; if (!inode) goto out_raced; - req_lock = &NFS_I(inode)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_release_request(req); return ret; } ret = __set_page_dirty_nobuffers(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return ret; out_raced: return !TestSetPageDirty(page); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 864090edc28..5443c52b57a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - char clientname[16]; int status; if (atomic_read(&cb->cb_set)) @@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) memset(program->stats, 0, sizeof(cb->cb_stat)); program->stats->program = program; - /* Just here to make some printk's more useful: */ - snprintf(clientname, sizeof(clientname), - "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); - args.servername = clientname; - /* Create RPC client */ cb->cb_client = rpc_create(&args); if (IS_ERR(cb->cb_client)) { @@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp) goto out_err; } - /* Kick rpciod, put the call on the wire. */ - if (rpciod_up() != 0) - goto out_clnt; - /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) - goto out_rpciod; + goto out_release_clp; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); - goto out_rpciod; + goto out_release_clp; } return; -out_rpciod: +out_release_clp: atomic_dec(&clp->cl_count); - rpciod_down(); -out_clnt: rpc_shutdown_client(cb->cb_client); out_err: cb->cb_client = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cc8ce422ab..8c52913d7cb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp) if (clnt) { clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); - rpciod_down(); } } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 05707e2fcca..e2d1ce36b36 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -39,6 +39,7 @@ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ struct sockaddr_in h_addr; /* peer address */ + struct sockaddr_in h_saddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ u32 h_version; /* interface version */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 7e7f33a38fc..8726491de15 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -15,6 +15,7 @@ #include <linux/types.h> +#define NFS4_BITMAP_SIZE 2 #define NFS4_VERIFIER_SIZE 8 #define NFS4_STATEID_SIZE 16 #define NFS4_FHSIZE 128 diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index 26b4c83f831..a0dcf665565 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,7 @@ struct nfs4_mount_data { #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ -#define NFS4_MOUNT_FLAGMASK 0xFFFF +#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ +#define NFS4_MOUNT_FLAGMASK 0x9033 #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0543439a97a..c098ae194f7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -30,7 +30,9 @@ #ifdef __KERNEL__ #include <linux/in.h> +#include <linux/kref.h> #include <linux/mm.h> +#include <linux/namei.h> #include <linux/pagemap.h> #include <linux/rbtree.h> #include <linux/rwsem.h> @@ -69,9 +71,8 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { - atomic_t count; - struct vfsmount *vfsmnt; - struct dentry *dentry; + struct kref kref; + struct path path; struct rpc_cred *cred; struct nfs4_state *state; fl_owner_t lockowner; @@ -155,13 +156,9 @@ struct nfs_inode { /* * This is the list of dirty unwritten pages. */ - spinlock_t req_lock; - struct list_head dirty; - struct list_head commit; struct radix_tree_root nfs_page_tree; - unsigned int ndirty, - ncommit, + unsigned long ncommit, npages; /* Open contexts for shared mmap writes */ @@ -187,6 +184,7 @@ struct nfs_inode { #define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ #define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ +#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ /* * Bit offsets in flags field @@ -496,21 +494,18 @@ static inline void nfs3_forget_cached_acls(struct inode *inode) /* * linux/fs/mount_clnt.c - * (Used only by nfsroot module) */ -extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *, - int, int); +extern int nfs_mount(struct sockaddr *, size_t, char *, char *, + int, int, struct nfs_fh *); /* * inline functions */ -static inline loff_t -nfs_size_to_loff_t(__u64 size) +static inline loff_t nfs_size_to_loff_t(__u64 size) { - loff_t maxsz = (((loff_t) ULONG_MAX) << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE - 1; - if (size > maxsz) - return maxsz; + if (size > (__u64) OFFSET_MAX - 1) + return OFFSET_MAX - 1; return (loff_t) size; } @@ -557,6 +552,7 @@ extern void * nfs_root_data(void); #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 +#define NFSDBG_MOUNT 0x0400 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 52b4378311c..0cac49bc095 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,7 +16,6 @@ struct nfs_client { #define NFS_CS_INITING 1 /* busy initialising */ int cl_nfsversion; /* NFS protocol version */ unsigned long cl_res_state; /* NFS resources state */ -#define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ @@ -35,7 +34,8 @@ struct nfs_client { nfs4_verifier cl_confirm; unsigned long cl_state; - u32 cl_lockowner_id; + struct rb_root cl_openowner_id; + struct rb_root cl_lockowner_id; /* * The following rwsem ensures exclusive access to the server @@ -44,9 +44,7 @@ struct nfs_client { struct rw_semaphore cl_sem; struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; + struct rb_root cl_state_owners; spinlock_t cl_lock; unsigned long cl_lease_time; diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index cc8b9c59acb..a3ade89a64d 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -37,7 +37,7 @@ struct nfs_mount_data { int acdirmin; /* 1 */ int acdirmax; /* 1 */ struct sockaddr_in addr; /* 1 */ - char hostname[256]; /* 1 */ + char hostname[NFS_MAXNAMLEN + 1]; /* 1 */ int namlen; /* 2 */ unsigned int bsize; /* 3 */ struct nfs3_fh root; /* 4 */ @@ -62,6 +62,7 @@ struct nfs_mount_data { #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ +#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index bd193af8016..78e60798d10 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -16,12 +16,13 @@ #include <linux/sunrpc/auth.h> #include <linux/nfs_xdr.h> -#include <asm/atomic.h> +#include <linux/kref.h> /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_WRITEBACK 0 +#define NFS_PAGE_TAG_LOCKED 0 +#define NFS_PAGE_TAG_COMMIT 1 /* * Valid flags for a dirty buffer @@ -33,8 +34,7 @@ struct nfs_inode; struct nfs_page { - struct list_head wb_list, /* Defines state of page: */ - *wb_list_head; /* read/write/commit */ + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ @@ -42,7 +42,7 @@ struct nfs_page { unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ - atomic_t wb_count; /* reference count */ + struct kref wb_kref; /* reference count */ unsigned long wb_flags; struct nfs_writeverf wb_verf; /* Commit cookie */ }; @@ -71,8 +71,8 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, - pgoff_t idx_start, unsigned int npages); +extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, + pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), @@ -84,12 +84,11 @@ extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern int nfs_set_page_writeback_locked(struct nfs_page *req); -extern void nfs_clear_page_writeback(struct nfs_page *req); +extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* - * Lock the page of an asynchronous request without incrementing the wb_count + * Lock the page of an asynchronous request without getting a new reference */ static inline int nfs_lock_request_dontget(struct nfs_page *req) @@ -98,14 +97,14 @@ nfs_lock_request_dontget(struct nfs_page *req) } /* - * Lock the page of an asynchronous request + * Lock the page of an asynchronous request and take a reference */ static inline int nfs_lock_request(struct nfs_page *req) { if (test_and_set_bit(PG_BUSY, &req->wb_flags)) return 0; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 1; } @@ -118,7 +117,6 @@ static inline void nfs_list_add_request(struct nfs_page *req, struct list_head *head) { list_add_tail(&req->wb_list, head); - req->wb_list_head = head; } @@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req) if (list_empty(&req->wb_list)) return; list_del_init(&req->wb_list); - req->wb_list_head = NULL; } static inline struct nfs_page * diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 10c26ed0db7..38d77681cf2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -119,7 +119,7 @@ struct nfs_openargs { struct nfs_seqid * seqid; int open_flags; __u64 clientid; - __u32 id; + __u64 id; union { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ @@ -144,6 +144,7 @@ struct nfs_openres { nfs4_stateid delegation; __u32 do_recall; __u64 maxsize; + __u32 attrset[NFS4_BITMAP_SIZE]; }; /* @@ -180,7 +181,7 @@ struct nfs_closeres { * */ struct nfs_lowner { __u64 clientid; - u32 id; + __u64 id; }; struct nfs_lock_args { diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 534cdc7be58..7a69ca3beba 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -16,6 +16,7 @@ #include <linux/sunrpc/xdr.h> #include <asm/atomic.h> +#include <linux/rcupdate.h> /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 @@ -30,22 +31,28 @@ struct auth_cred { /* * Client user credentials */ +struct rpc_auth; +struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ - struct rpc_credops * cr_ops; - unsigned long cr_expire; /* when to gc */ - atomic_t cr_count; /* ref count */ - unsigned short cr_flags; /* various flags */ + struct list_head cr_lru; /* lru garbage collection */ + struct rcu_head cr_rcu; + struct rpc_auth * cr_auth; + const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif + unsigned long cr_expire; /* when to gc */ + unsigned long cr_flags; /* various flags */ + atomic_t cr_count; /* ref count */ uid_t cr_uid; /* per-flavor data */ }; -#define RPCAUTH_CRED_NEW 0x0001 -#define RPCAUTH_CRED_UPTODATE 0x0002 +#define RPCAUTH_CRED_NEW 0 +#define RPCAUTH_CRED_UPTODATE 1 +#define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 @@ -56,10 +63,10 @@ struct rpc_cred { #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; - unsigned long nextgc; /* next garbage collection */ - unsigned long expire; /* cache expiry interval */ + spinlock_t lock; }; +struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ /* guess at number of u32's auth adds before @@ -69,7 +76,7 @@ struct rpc_auth { unsigned int au_verfsize; unsigned int au_flags; /* various flags */ - struct rpc_authops * au_ops; /* operations */ + const struct rpc_authops *au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss @@ -115,17 +122,19 @@ struct rpc_credops { void *, __be32 *, void *); }; -extern struct rpc_authops authunix_ops; -extern struct rpc_authops authnull_ops; -#ifdef CONFIG_SUNRPC_SECURE -extern struct rpc_authops authdes_ops; -#endif +extern const struct rpc_authops authunix_ops; +extern const struct rpc_authops authnull_ops; + +void __init rpc_init_authunix(void); +void __init rpcauth_init_module(void); +void __exit rpcauth_remove_module(void); -int rpcauth_register(struct rpc_authops *); -int rpcauth_unregister(struct rpc_authops *); +int rpcauth_register(const struct rpc_authops *); +int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); -void rpcauth_destroy(struct rpc_auth *); +void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); @@ -138,8 +147,9 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -int rpcauth_init_credcache(struct rpc_auth *, unsigned long); -void rpcauth_free_credcache(struct rpc_auth *); +int rpcauth_init_credcache(struct rpc_auth *); +void rpcauth_destroy_credcache(struct rpc_auth *); +void rpcauth_clear_credcache(struct rpc_cred_cache *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 2db2fbf3494..67658e17a37 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -75,6 +75,7 @@ struct gss_cl_ctx { struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; + struct rcu_head gc_rcu; }; struct gss_upcall_msg; @@ -85,11 +86,6 @@ struct gss_cred { struct gss_upcall_msg *gc_upcall; }; -#define gc_uid gc_base.cr_uid -#define gc_count gc_base.cr_count -#define gc_flags gc_base.cr_flags -#define gc_expire gc_base.cr_expire - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 66611423c8e..c0d9d14983b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -24,8 +24,10 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - atomic_t cl_count; /* Number of clones */ - atomic_t cl_users; /* number of references */ + struct kref cl_kref; /* Number of references */ + struct list_head cl_clients; /* Global list of clients */ + struct list_head cl_tasks; /* List of tasks */ + spinlock_t cl_lock; /* spinlock */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ @@ -41,9 +43,7 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1,/* use getport() */ - cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -98,6 +98,7 @@ struct rpc_create_args { int protocol; struct sockaddr *address; size_t addrsize; + struct sockaddr *saddress; struct rpc_timeout *timeout; char *servername; struct rpc_program *program; @@ -110,20 +111,20 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) -#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) -#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) -#define RPC_CLNT_CREATE_NOPING (1UL << 5) -#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); -int rpc_shutdown_client(struct rpc_clnt *); -int rpc_destroy_client(struct rpc_clnt *); +void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); + int rpcb_register(u32, u32, int, unsigned short, int *); -void rpcb_getport(struct rpc_task *); +int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); +void rpcb_getport_async(struct rpc_task *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -132,20 +133,16 @@ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, + int flags); void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); -int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -/* - * Helper function for NFSroot support - */ -int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int); - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 5eca9e44205..bbac101ac37 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -77,7 +77,7 @@ struct gss_api_mech { struct module *gm_owner; struct xdr_netobj gm_oid; char *gm_name; - struct gss_api_ops *gm_ops; + const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ int gm_pf_num; struct pf_desc * gm_pfs; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index ad293760f6e..51b977a4ca2 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -23,9 +23,11 @@ struct rpc_inode { void *private; struct list_head pipe; struct list_head in_upcall; + struct list_head in_downcall; int pipelen; int nreaders; int nwriters; + int nkern_readwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2047fb202a1..8ea077db009 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -98,7 +98,6 @@ struct rpc_task { unsigned short tk_pid; /* debugging aid */ #endif }; -#define tk_auth tk_client->cl_auth #define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ @@ -110,11 +109,6 @@ struct rpc_task { if (!list_empty(head) && \ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) -/* .. and walking list of all tasks */ -#define alltask_for_each(task, pos, head) \ - list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_task)),1) - typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index e21dd93ac4b..a53e0fa855d 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -59,6 +59,7 @@ struct svc_sock { /* cache of various info for TCP sockets */ void *sk_info_authunix; + struct sockaddr_storage sk_local; /* local address */ struct sockaddr_storage sk_remote; /* remote peer's address */ int sk_remotelen; /* length of address */ }; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 34f7590506f..d11cedd14f0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -17,6 +17,8 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/msg_prot.h> +#ifdef __KERNEL__ + extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -194,7 +196,13 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -#ifdef __KERNEL__ +struct rpc_xprtsock_create { + int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + struct sockaddr * srcaddr; /* optional local address */ + struct sockaddr * dstaddr; /* remote peer address */ + size_t addrlen; + struct rpc_timeout * timeout; /* optional timeout parameters */ +}; /* * Transport operations used by ULPs @@ -204,7 +212,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); +struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -242,8 +250,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); -struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); +struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); +struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); int init_socket_xprt(void); void cleanup_socket_xprt(void); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9527f2bb174..aa55d0a03e6 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -13,17 +13,22 @@ #include <linux/errno.h> #include <linux/sunrpc/clnt.h> #include <linux/spinlock.h> +#include <linux/smp_lock.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { +static DEFINE_SPINLOCK(rpc_authflavor_lock); +static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ &authunix_ops, /* AUTH_UNIX */ NULL, /* others can be loadable modules */ }; +static LIST_HEAD(cred_unused); +static unsigned long number_cred_unused; + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -32,55 +37,67 @@ pseudoflavor_to_flavor(u32 flavor) { } int -rpcauth_register(struct rpc_authops *ops) +rpcauth_register(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != NULL) - return -EPERM; /* what else? */ - auth_flavors[flavor] = ops; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == NULL) { + auth_flavors[flavor] = ops; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } int -rpcauth_unregister(struct rpc_authops *ops) +rpcauth_unregister(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != ops) - return -EPERM; /* what else? */ - auth_flavors[flavor] = NULL; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == ops) { + auth_flavors[flavor] = NULL; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { struct rpc_auth *auth; - struct rpc_authops *ops; + const struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); auth = ERR_PTR(-EINVAL); if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; - /* FIXME - auth_flavors[] really needs an rw lock, - * and module refcounting. */ #ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); #endif - if ((ops = auth_flavors[flavor]) == NULL) + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); goto out; + } + spin_unlock(&rpc_authflavor_lock); auth = ops->create(clnt, pseudoflavor); + module_put(ops->owner); if (IS_ERR(auth)) return auth; if (clnt->cl_auth) - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = auth; out: @@ -88,7 +105,7 @@ out: } void -rpcauth_destroy(struct rpc_auth *auth) +rpcauth_release(struct rpc_auth *auth) { if (!atomic_dec_and_test(&auth->au_count)) return; @@ -97,11 +114,31 @@ rpcauth_destroy(struct rpc_auth *auth) static DEFINE_SPINLOCK(rpc_credcache_lock); +static void +rpcauth_unhash_cred_locked(struct rpc_cred *cred) +{ + hlist_del_rcu(&cred->cr_hash); + smp_mb__before_clear_bit(); + clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); +} + +static void +rpcauth_unhash_cred(struct rpc_cred *cred) +{ + spinlock_t *cache_lock; + + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) + rpcauth_unhash_cred_locked(cred); + spin_unlock(cache_lock); +} + /* * Initialize RPC credential cache */ int -rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) +rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; int i; @@ -111,8 +148,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) return -ENOMEM; for (i = 0; i < RPC_CREDCACHE_NR; i++) INIT_HLIST_HEAD(&new->hashtable[i]); - new->expire = expire; - new->nextgc = jiffies + (expire >> 1); + spin_lock_init(&new->lock); auth->au_credcache = new; return 0; } @@ -121,13 +157,13 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) * Destroy a list of credentials */ static inline -void rpcauth_destroy_credlist(struct hlist_head *head) +void rpcauth_destroy_credlist(struct list_head *head) { struct rpc_cred *cred; - while (!hlist_empty(head)) { - cred = hlist_entry(head->first, struct rpc_cred, cr_hash); - hlist_del_init(&cred->cr_hash); + while (!list_empty(head)) { + cred = list_entry(head->next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); put_rpccred(cred); } } @@ -137,58 +173,95 @@ void rpcauth_destroy_credlist(struct hlist_head *head) * that are not referenced. */ void -rpcauth_free_credcache(struct rpc_auth *auth) +rpcauth_clear_credcache(struct rpc_cred_cache *cache) { - struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; + LIST_HEAD(free); + struct hlist_head *head; struct rpc_cred *cred; int i; spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, &free); + head = &cache->hashtable[i]; + while (!hlist_empty(head)) { + cred = hlist_entry(head->first, struct rpc_cred, cr_hash); + get_rpccred(cred); + if (!list_empty(&cred->cr_lru)) { + list_del(&cred->cr_lru); + number_cred_unused--; + } + list_add_tail(&cred->cr_lru, &free); + rpcauth_unhash_cred_locked(cred); } } + spin_unlock(&cache->lock); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); } -static void -rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) +/* + * Destroy the RPC credential cache + */ +void +rpcauth_destroy_credcache(struct rpc_auth *auth) { - if (atomic_read(&cred->cr_count) != 1) - return; - if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, free); + struct rpc_cred_cache *cache = auth->au_credcache; + + if (cache) { + auth->au_credcache = NULL; + rpcauth_clear_credcache(cache); + kfree(cache); } } /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static void -rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) +static int +rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { - struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos, *next; - struct rpc_cred *cred; - int i; + spinlock_t *cache_lock; + struct rpc_cred *cred; - dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - rpcauth_prune_expired(auth, cred, free); + while (!list_empty(&cred_unused)) { + cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); + number_cred_unused--; + if (atomic_read(&cred->cr_count) != 0) + continue; + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) { + get_rpccred(cred); + list_add_tail(&cred->cr_lru, free); + rpcauth_unhash_cred_locked(cred); + nr_to_scan--; } + spin_unlock(cache_lock); + if (nr_to_scan == 0) + break; } - cache->nextgc = jiffies + cache->expire; + return nr_to_scan; +} + +/* + * Run memory cache shrinker. + */ +static int +rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +{ + LIST_HEAD(free); + int res; + + if (list_empty(&cred_unused)) + return 0; + spin_lock(&rpc_credcache_lock); + nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); + res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + return res; } /* @@ -198,53 +271,56 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, int flags) { + LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; - struct rpc_cred *new = NULL, - *cred = NULL; + struct hlist_node *pos; + struct rpc_cred *cred = NULL, + *entry, *new; int nr = 0; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) nr = acred->uid & RPC_CREDCACHE_MASK; -retry: - spin_lock(&rpc_credcache_lock); - if (time_before(cache->nextgc, jiffies)) - rpcauth_gc_credcache(auth, &free); - hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { - struct rpc_cred *entry; - entry = hlist_entry(pos, struct rpc_cred, cr_hash); - if (entry->cr_ops->crmatch(acred, entry, flags)) { - hlist_del(&entry->cr_hash); - cred = entry; - break; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + spin_lock(&cache->lock); + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { + spin_unlock(&cache->lock); + continue; } - rpcauth_prune_expired(auth, entry, &free); + cred = get_rpccred(entry); + spin_unlock(&cache->lock); + break; } - if (new) { - if (cred) - hlist_add_head(&new->cr_hash, &free); - else - cred = new; - } - if (cred) { - hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); - get_rpccred(cred); - } - spin_unlock(&rpc_credcache_lock); + rcu_read_unlock(); - rpcauth_destroy_credlist(&free); + if (cred != NULL) + goto found; - if (!cred) { - new = auth->au_ops->crcreate(auth, acred, flags); - if (!IS_ERR(new)) { -#ifdef RPC_DEBUG - new->cr_magic = RPCAUTH_CRED_MAGIC; -#endif - goto retry; - } else - cred = new; - } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) + new = auth->au_ops->crcreate(auth, acred, flags); + if (IS_ERR(new)) { + cred = new; + goto out; + } + + spin_lock(&cache->lock); + hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + cred = get_rpccred(entry); + break; + } + if (cred == NULL) { + cred = new; + set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); + hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); + } else + list_add_tail(&new->cr_lru, &free); + spin_unlock(&cache->lock); +found: + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); @@ -253,8 +329,9 @@ retry: cred = ERR_PTR(res); } } - - return (struct rpc_cred *) cred; + rpcauth_destroy_credlist(&free); +out: + return cred; } struct rpc_cred * @@ -275,10 +352,27 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) return ret; } +void +rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, + struct rpc_auth *auth, const struct rpc_credops *ops) +{ + INIT_HLIST_NODE(&cred->cr_hash); + INIT_LIST_HEAD(&cred->cr_lru); + atomic_set(&cred->cr_count, 1); + cred->cr_auth = auth; + cred->cr_ops = ops; + cred->cr_expire = jiffies; +#ifdef RPC_DEBUG + cred->cr_magic = RPCAUTH_CRED_MAGIC; +#endif + cred->cr_uid = acred->uid; +} +EXPORT_SYMBOL(rpcauth_init_cred); + struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, @@ -288,7 +382,7 @@ rpcauth_bindcred(struct rpc_task *task) int flags = 0; dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_auth->au_ops->au_name); + task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); get_group_info(acred.group_info); if (task->tk_flags & RPC_TASK_ROOTCREDS) flags |= RPCAUTH_LOOKUP_ROOTCREDS; @@ -304,19 +398,42 @@ rpcauth_bindcred(struct rpc_task *task) void rpcauth_holdcred(struct rpc_task *task) { - dprintk("RPC: %5u holding %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, - task->tk_msg.rpc_cred); - if (task->tk_msg.rpc_cred) - get_rpccred(task->tk_msg.rpc_cred); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + if (cred != NULL) { + get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); + } } void put_rpccred(struct rpc_cred *cred) { - cred->cr_expire = jiffies; + /* Fast path for unhashed credentials */ + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + goto need_lock; + if (!atomic_dec_and_test(&cred->cr_count)) return; + goto out_destroy; +need_lock: + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + return; + if (!list_empty(&cred->cr_lru)) { + number_cred_unused--; + list_del_init(&cred->cr_lru); + } + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) + rpcauth_unhash_cred(cred); + else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + spin_unlock(&rpc_credcache_lock); + return; + } + spin_unlock(&rpc_credcache_lock); +out_destroy: cred->cr_ops->crdestroy(cred); } @@ -326,7 +443,7 @@ rpcauth_unbindcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); put_rpccred(cred); task->tk_msg.rpc_cred = NULL; @@ -338,7 +455,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crmarshal(task, p); } @@ -349,7 +466,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crvalidate(task, p); } @@ -359,13 +476,17 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret; dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ - return encode(rqstp, data, obj); + lock_kernel(); + ret = encode(rqstp, data, obj); + unlock_kernel(); + return ret; } int @@ -373,6 +494,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret; dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -380,7 +502,10 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); /* By default, we decode the arguments normally. */ - return decode(rqstp, data, obj); + lock_kernel(); + ret = decode(rqstp, data, obj); + unlock_kernel(); + return ret; } int @@ -390,7 +515,7 @@ rpcauth_refreshcred(struct rpc_task *task) int err; dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); if (err < 0) @@ -401,17 +526,34 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); - spin_lock(&rpc_credcache_lock); - if (task->tk_msg.rpc_cred) - task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - spin_unlock(&rpc_credcache_lock); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } int rpcauth_uptodatecred(struct rpc_task *task) { - return !(task->tk_msg.rpc_cred) || - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + return cred == NULL || + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; +} + + +static struct shrinker *rpc_cred_shrinker; + +void __init rpcauth_init_module(void) +{ + rpc_init_authunix(); + rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker); +} + +void __exit rpcauth_remove_module(void) +{ + if (rpc_cred_shrinker != NULL) + remove_shrinker(rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4e4ccc5b6fe..baf4096d52d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -54,9 +54,10 @@ #include <linux/sunrpc/gss_api.h> #include <asm/uaccess.h> -static struct rpc_authops authgss_ops; +static const struct rpc_authops authgss_ops; -static struct rpc_credops gss_credops; +static const struct rpc_credops gss_credops; +static const struct rpc_credops gss_nullops; #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -64,7 +65,6 @@ static struct rpc_credops gss_credops; #define NFS_NGROUPS 16 -#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ @@ -79,19 +79,16 @@ static struct rpc_credops gss_credops; /* dump the buffer in `emacs-hexl' style */ #define isprint(c) ((c > 0x1f) && (c < 0x7f)) -static DEFINE_RWLOCK(gss_ctx_lock); - struct gss_auth { + struct kref kref; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; - struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - spinlock_t lock; }; -static void gss_destroy_ctx(struct gss_cl_ctx *); +static void gss_free_ctx(struct gss_cl_ctx *); static struct rpc_pipe_ops gss_upcall_ops; static inline struct gss_cl_ctx * @@ -105,20 +102,24 @@ static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { if (atomic_dec_and_test(&ctx->count)) - gss_destroy_ctx(ctx); + gss_free_ctx(ctx); } +/* gss_cred_set_ctx: + * called by gss_upcall_callback and gss_create_upcall in order + * to set the gss context. The actual exchange of an old context + * and a new one is protected by the inode->i_lock. + */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *old; - write_lock(&gss_ctx_lock); + old = gss_cred->gc_ctx; - gss_cred->gc_ctx = ctx; - cred->cr_flags |= RPCAUTH_CRED_UPTODATE; - cred->cr_flags &= ~RPCAUTH_CRED_NEW; - write_unlock(&gss_ctx_lock); + rcu_assign_pointer(gss_cred->gc_ctx, ctx); + set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); if (old) gss_put_ctx(old); } @@ -129,10 +130,10 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); int res = 0; - read_lock(&gss_ctx_lock); - if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) + rcu_read_lock(); + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) res = 1; - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return res; } @@ -171,10 +172,10 @@ gss_cred_get_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = NULL; - read_lock(&gss_ctx_lock); + rcu_read_lock(); if (gss_cred->gc_ctx) ctx = gss_get_ctx(gss_cred->gc_ctx); - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return ctx; } @@ -269,10 +270,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) +__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &gss_auth->upcalls, list) { + list_for_each_entry(pos, &rpci->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -290,24 +291,24 @@ __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) { + struct inode *inode = gss_auth->dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *old; - spin_lock(&gss_auth->lock); - old = __gss_find_upcall(gss_auth, gss_msg->uid); + spin_lock(&inode->i_lock); + old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &gss_auth->upcalls); + list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); return gss_msg; } static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - if (list_empty(&gss_msg->list)) - return; list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); @@ -318,10 +319,14 @@ static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct gss_auth *gss_auth = gss_msg->auth; + struct inode *inode = gss_auth->dentry->d_inode; - spin_lock(&gss_auth->lock); - __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + if (list_empty(&gss_msg->list)) + return; + spin_lock(&inode->i_lock); + if (!list_empty(&gss_msg->list)) + __gss_unhash_msg(gss_msg); + spin_unlock(&inode->i_lock); } static void @@ -330,16 +335,16 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; + struct inode *inode = gss_msg->auth->dentry->d_inode; - BUG_ON(gss_msg == NULL); + spin_lock(&inode->i_lock); if (gss_msg->ctx) gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); else task->tk_status = gss_msg->msg.errno; - spin_lock(&gss_msg->auth->lock); gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); - spin_unlock(&gss_msg->auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); } @@ -386,11 +391,12 @@ static inline int gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; + struct inode *inode = gss_auth->dentry->d_inode; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -400,7 +406,7 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -411,7 +417,7 @@ gss_refresh_upcall(struct rpc_task *task) rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); } else err = gss_msg->msg.errno; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -422,6 +428,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct inode *inode = gss_auth->dentry->d_inode; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -435,12 +442,11 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) } for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { - spin_unlock(&gss_auth->lock); break; } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); if (signalled()) { err = -ERESTARTSYS; goto out_intr; @@ -451,6 +457,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); else err = gss_msg->msg.errno; + spin_unlock(&inode->i_lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -489,12 +496,11 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct rpc_clnt *clnt; - struct gss_auth *gss_auth; - struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; + struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; - int err = -EFBIG; + ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; @@ -503,7 +509,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(filp->f_path.dentry->d_inode)->private; + clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -519,43 +525,38 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ctx = gss_alloc_context(); if (ctx == NULL) goto err; - err = 0; - gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); - p = gss_fill_context(p, end, ctx, gss_auth->mech); + + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&inode->i_lock); + gss_msg = __gss_find_upcall(RPC_I(inode), uid); + if (gss_msg == NULL) { + spin_unlock(&inode->i_lock); + goto err_put_ctx; + } + list_del_init(&gss_msg->list); + spin_unlock(&inode->i_lock); + + p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); - if (err != -EACCES) - goto err_put_ctx; - } - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - if (gss_msg) { - if (err == 0 && gss_msg->ctx == NULL) - gss_msg->ctx = gss_get_ctx(ctx); - gss_msg->msg.errno = err; - __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); - gss_release_msg(gss_msg); - } else { - struct auth_cred acred = { .uid = uid }; - spin_unlock(&gss_auth->lock); - cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); - if (IS_ERR(cred)) { - err = PTR_ERR(cred); - goto err_put_ctx; - } - gss_cred_set_ctx(cred, gss_get_ctx(ctx)); + gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; + goto err_release_msg; } - gss_put_ctx(ctx); - kfree(buf); - dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); - return mlen; + gss_msg->ctx = gss_get_ctx(ctx); + err = mlen; + +err_release_msg: + spin_lock(&inode->i_lock); + __gss_unhash_msg(gss_msg); + spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: - dprintk("RPC: gss_pipe_downcall returning %d\n", err); + dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); return err; } @@ -563,27 +564,21 @@ static void gss_pipe_release(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_clnt *clnt; - struct rpc_auth *auth; - struct gss_auth *gss_auth; + struct gss_upcall_msg *gss_msg; - clnt = rpci->private; - auth = clnt->cl_auth; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - spin_lock(&gss_auth->lock); - while (!list_empty(&gss_auth->upcalls)) { - struct gss_upcall_msg *gss_msg; + spin_lock(&inode->i_lock); + while (!list_empty(&rpci->in_downcall)) { - gss_msg = list_entry(gss_auth->upcalls.next, + gss_msg = list_entry(rpci->in_downcall.next, struct gss_upcall_msg, list); gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -637,18 +632,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; - INIT_LIST_HEAD(&gss_auth->upcalls); - spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - - err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); - if (err) - goto err_put_mech; + kref_init(&gss_auth->kref); gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); @@ -657,7 +647,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } + err = rpcauth_init_credcache(auth); + if (err) + goto err_unlink_pipe; + return auth; +err_unlink_pipe: + rpc_unlink(gss_auth->dentry); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -668,6 +664,25 @@ out_dec: } static void +gss_free(struct gss_auth *gss_auth) +{ + rpc_unlink(gss_auth->dentry); + gss_auth->dentry = NULL; + gss_mech_put(gss_auth->mech); + + kfree(gss_auth); + module_put(THIS_MODULE); +} + +static void +gss_free_callback(struct kref *kref) +{ + struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref); + + gss_free(gss_auth); +} + +static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth; @@ -675,23 +690,51 @@ gss_destroy(struct rpc_auth *auth) dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + rpcauth_destroy_credcache(auth); + gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->dentry); - gss_auth->dentry = NULL; - gss_mech_put(gss_auth->mech); + kref_put(&gss_auth->kref, gss_free_callback); +} - rpcauth_free_credcache(auth); - kfree(gss_auth); - module_put(THIS_MODULE); +/* + * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call + * to the server with the GSS control procedure field set to + * RPC_GSS_PROC_DESTROY. This should normally cause the server to release + * all RPCSEC_GSS state associated with that context. + */ +static int +gss_destroying_context(struct rpc_cred *cred) +{ + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct rpc_task *task; + + if (gss_cred->gc_ctx == NULL || + gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY) + return 0; + + gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; + cred->cr_ops = &gss_nullops; + + /* Take a reference to ensure the cred will be destroyed either + * by the RPC call or by the put_rpccred() below */ + get_rpccred(cred); + + task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC); + if (!IS_ERR(task)) + rpc_put_task(task); + + put_rpccred(cred); + return 1; } -/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure +/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void -gss_destroy_ctx(struct gss_cl_ctx *ctx) +gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: gss_destroy_ctx\n"); + dprintk("RPC: gss_free_ctx\n"); if (ctx->gc_gss_ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); @@ -701,15 +744,46 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx) } static void -gss_destroy_cred(struct rpc_cred *rc) +gss_free_ctx_callback(struct rcu_head *head) { - struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); + struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu); + gss_do_free_ctx(ctx); +} - dprintk("RPC: gss_destroy_cred \n"); +static void +gss_free_ctx(struct gss_cl_ctx *ctx) +{ + call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); +} - if (cred->gc_ctx) - gss_put_ctx(cred->gc_ctx); - kfree(cred); +static void +gss_free_cred(struct gss_cred *gss_cred) +{ + dprintk("RPC: gss_free_cred %p\n", gss_cred); + kfree(gss_cred); +} + +static void +gss_free_cred_callback(struct rcu_head *head) +{ + struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu); + gss_free_cred(gss_cred); +} + +static void +gss_destroy_cred(struct rpc_cred *cred) +{ + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct gss_cl_ctx *ctx = gss_cred->gc_ctx; + + if (gss_destroying_context(cred)) + return; + rcu_assign_pointer(gss_cred->gc_ctx, NULL); + call_rcu(&cred->cr_rcu, gss_free_cred_callback); + if (ctx) + gss_put_ctx(ctx); + kref_put(&gss_auth->kref, gss_free_callback); } /* @@ -734,16 +808,14 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) goto out_err; - atomic_set(&cred->gc_count, 1); - cred->gc_uid = acred->uid; + rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ - cred->gc_flags = 0; - cred->gc_base.cr_ops = &gss_credops; - cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; + cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; + kref_get(&gss_auth->kref); return &cred->gc_base; out_err: @@ -774,7 +846,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) * we don't really care if the credential has expired or not, * since the caller should be prepared to reinitialise it. */ - if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW)) + if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) @@ -830,7 +902,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } else if (maj_stat != 0) { printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; @@ -855,6 +927,13 @@ gss_refresh(struct rpc_task *task) return 0; } +/* Dummy refresh routine: used only when destroying the context */ +static int +gss_refresh_null(struct rpc_task *task) +{ + return -EACCES; +} + static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { @@ -883,12 +962,15 @@ gss_validate(struct rpc_task *task, __be32 *p) maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (maj_stat) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (maj_stat) { + dprintk("RPC: %5u gss_validate: gss_verify_mic returned" + "error 0x%08x\n", task->tk_pid, maj_stat); goto out_bad; + } /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -917,7 +999,9 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); if (status) return status; @@ -937,7 +1021,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; q = xdr_encode_opaque(p, NULL, mic.len); @@ -1011,7 +1095,9 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); if (status) return status; @@ -1036,7 +1122,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; @@ -1070,12 +1156,16 @@ gss_wrap_req(struct rpc_task *task, /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, encode, @@ -1123,7 +1213,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; return 0; @@ -1148,7 +1238,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; if (ntohl(*(*p)++) != rqstp->rq_seqno) @@ -1188,10 +1278,12 @@ gss_unwrap_resp(struct rpc_task *task, break; } /* take into account extra slack for integrity and privacy cases: */ - task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: + lock_kernel(); status = decode(rqstp, p, obj); + unlock_kernel(); out: gss_put_ctx(ctx); dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, @@ -1199,7 +1291,7 @@ out: return status; } -static struct rpc_authops authgss_ops = { +static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, #ifdef RPC_DEBUG @@ -1211,7 +1303,7 @@ static struct rpc_authops authgss_ops = { .crcreate = gss_create_cred }; -static struct rpc_credops gss_credops = { +static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, @@ -1223,6 +1315,17 @@ static struct rpc_credops gss_credops = { .crunwrap_resp = gss_unwrap_resp, }; +static const struct rpc_credops gss_nullops = { + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh_null, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, +}; + static struct rpc_pipe_ops gss_upcall_ops = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 7b194321705..71b9daefdff 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -201,7 +201,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -static struct gss_api_ops gss_kerberos_ops = { +static const struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 7e15aa68ae6..577d590e755 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -202,7 +202,7 @@ gss_get_mic_spkm3(struct gss_ctx *ctx, return err; } -static struct gss_api_ops gss_spkm3_ops = { +static const struct gss_api_ops gss_spkm3_ops = { .gss_import_sec_context = gss_import_sec_context_spkm3, .gss_get_mic = gss_get_mic_spkm3, .gss_verify_mic = gss_verify_mic_spkm3, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 3df9fccab2f..537d0e8589d 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -76,7 +76,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -101,7 +101,7 @@ nul_validate(struct rpc_task *task, __be32 *p) return p; } -struct rpc_authops authnull_ops = { +const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, #ifdef RPC_DEBUG @@ -122,7 +122,7 @@ struct rpc_auth null_auth = { }; static -struct rpc_credops null_credops = { +const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, .crmatch = nul_match, @@ -133,9 +133,11 @@ struct rpc_credops null_credops = { static struct rpc_cred null_cred = { + .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), + .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), - .cr_flags = RPCAUTH_CRED_UPTODATE, + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, #ifdef RPC_DEBUG .cr_magic = RPCAUTH_CRED_MAGIC, #endif diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4e7733aee36..5ed91e5bcee 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -20,11 +20,6 @@ struct unx_cred { gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid -#define uc_count uc_base.cr_count -#define uc_flags uc_base.cr_flags -#define uc_expire uc_base.cr_expire - -#define UNX_CRED_EXPIRE (60 * HZ) #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) @@ -34,15 +29,14 @@ struct unx_cred { static struct rpc_auth unix_auth; static struct rpc_cred_cache unix_cred_cache; -static struct rpc_credops unix_credops; +static const struct rpc_credops unix_credops; static struct rpc_auth * unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (atomic_inc_return(&unix_auth.au_count) == 0) - unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); + atomic_inc(&unix_auth.au_count); return &unix_auth; } @@ -50,7 +44,7 @@ static void unx_destroy(struct rpc_auth *auth) { dprintk("RPC: destroying UNIX authenticator %p\n", auth); - rpcauth_free_credcache(auth); + rpcauth_clear_credcache(auth->au_credcache); } /* @@ -74,8 +68,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) return ERR_PTR(-ENOMEM); - atomic_set(&cred->uc_count, 1); - cred->uc_flags = RPCAUTH_CRED_UPTODATE; + rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); + cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; @@ -85,22 +79,34 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } - cred->uc_base.cr_ops = &unix_credops; - return (struct rpc_cred *) cred; + return &cred->uc_base; +} + +static void +unx_free_cred(struct unx_cred *unx_cred) +{ + dprintk("RPC: unx_free_cred %p\n", unx_cred); + kfree(unx_cred); +} + +static void +unx_free_cred_callback(struct rcu_head *head) +{ + struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); + unx_free_cred(unx_cred); } static void unx_destroy_cred(struct rpc_cred *cred) { - kfree(cred); + call_rcu(&cred->cr_rcu, unx_free_cred_callback); } /* @@ -111,7 +117,7 @@ unx_destroy_cred(struct rpc_cred *cred) static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { - struct unx_cred *cred = (struct unx_cred *) rcred; + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); int i; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { @@ -142,7 +148,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; + struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; @@ -175,7 +181,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -198,13 +204,18 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_auth->au_rslack = (size >> 2) + 2; + task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; } -struct rpc_authops authunix_ops = { +void __init rpc_init_authunix(void) +{ + spin_lock_init(&unix_cred_cache.lock); +} + +const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, #ifdef RPC_DEBUG @@ -218,7 +229,6 @@ struct rpc_authops authunix_ops = { static struct rpc_cred_cache unix_cred_cache = { - .expire = UNX_CRED_EXPIRE, }; static @@ -232,7 +242,7 @@ struct rpc_auth unix_auth = { }; static -struct rpc_credops unix_credops = { +const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d8fbee40a19..52429b1ffcc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -44,6 +44,12 @@ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ __FUNCTION__, t->tk_status) +/* + * All RPC clients are linked into this list + */ +static LIST_HEAD(all_clients); +static DEFINE_SPINLOCK(rpc_client_lock); + static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -66,6 +72,21 @@ static void call_connect_status(struct rpc_task *task); static __be32 * call_header(struct rpc_task *task); static __be32 * call_verify(struct rpc_task *task); +static int rpc_ping(struct rpc_clnt *clnt, int flags); + +static void rpc_register_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_client_lock); +} + +static void rpc_unregister_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_del(&clnt->cl_clients); + spin_unlock(&rpc_client_lock); +} static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) @@ -111,6 +132,9 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); + err = rpciod_up(); + if (err) + goto out_no_rpciod; err = -EINVAL; if (!xprt) goto out_no_xprt; @@ -121,8 +145,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_users, 0); - atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; @@ -148,6 +170,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_metrics == NULL) goto out_no_stats; clnt->cl_program = program; + INIT_LIST_HEAD(&clnt->cl_tasks); + spin_lock_init(&clnt->cl_lock); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -155,6 +179,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + kref_init(&clnt->cl_kref); + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) goto out_no_path; @@ -172,6 +198,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + rpc_register_client(clnt); return clnt; out_no_auth: @@ -188,6 +215,8 @@ out_no_stats: out_err: xprt_put(xprt); out_no_xprt: + rpciod_down(); +out_no_rpciod: return ERR_PTR(err); } @@ -205,13 +234,32 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; + struct rpc_xprtsock_create xprtargs = { + .proto = args->protocol, + .srcaddr = args->saddress, + .dstaddr = args->address, + .addrlen = args->addrsize, + .timeout = args->timeout + }; + char servername[20]; - xprt = xprt_create_transport(args->protocol, args->address, - args->addrsize, args->timeout); + xprt = xprt_create_transport(&xprtargs); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; /* + * If the caller chooses not to specify a hostname, whip + * up a string representation of the passed-in address. + */ + if (args->servername == NULL) { + struct sockaddr_in *addr = + (struct sockaddr_in *) &args->address; + snprintf(servername, sizeof(servername), NIPQUAD_FMT, + NIPQUAD(addr->sin_addr.s_addr)); + args->servername = servername; + } + + /* * By default, kernel RPC client connects from a reserved port. * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, * but it is always enabled for rpciod, which handles the connect @@ -245,8 +293,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_intr = 1; if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_ONESHOT) - clnt->cl_oneshot = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; @@ -268,24 +314,25 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - atomic_set(&new->cl_count, 1); - atomic_set(&new->cl_users, 0); + new->cl_parent = clnt; + /* Turn off autobind on clones */ + new->cl_autobind = 0; + INIT_LIST_HEAD(&new->cl_tasks); + spin_lock_init(&new->cl_lock); + rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; + kref_init(&new->cl_kref); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; - new->cl_parent = clnt; - atomic_inc(&clnt->cl_count); - new->cl_xprt = xprt_get(clnt->cl_xprt); - /* Turn off autobind on clones */ - new->cl_autobind = 0; - new->cl_oneshot = 0; - new->cl_dead = 0; - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + xprt_get(clnt->cl_xprt); + kref_get(&clnt->cl_kref); + rpc_register_client(new); + rpciod_up(); return new; out_no_path: rpc_free_iostats(new->cl_metrics); @@ -298,86 +345,86 @@ out_no_clnt: /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot and - * cl_dead are cleared, or else the client would be destroyed - * when the last task releases it. + * requests. */ -int -rpc_shutdown_client(struct rpc_clnt *clnt) +void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_server, - atomic_read(&clnt->cl_users)); - - while (atomic_read(&clnt->cl_users) > 0) { - /* Don't let rpc_release_client destroy us */ - clnt->cl_oneshot = 0; - clnt->cl_dead = 0; + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); + + while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, - !atomic_read(&clnt->cl_users), 1*HZ); - } - - if (atomic_read(&clnt->cl_users) < 0) { - printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", - clnt, atomic_read(&clnt->cl_users)); -#ifdef RPC_DEBUG - rpc_show_tasks(); -#endif - BUG(); + list_empty(&clnt->cl_tasks), 1*HZ); } - return rpc_destroy_client(clnt); + rpc_release_client(clnt); } /* - * Delete an RPC client + * Free an RPC client */ -int -rpc_destroy_client(struct rpc_clnt *clnt) +static void +rpc_free_client(struct kref *kref) { - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - BUG_ON(atomic_read(&clnt->cl_users) != 0); + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { - rpcauth_destroy(clnt->cl_auth); - clnt->cl_auth = NULL; - } if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { - rpc_destroy_client(clnt->cl_parent); + rpc_release_client(clnt->cl_parent); goto out_free; } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); + rpciod_down(); kfree(clnt); - return 0; } /* - * Release an RPC client + * Free an RPC client + */ +static void +rpc_free_auth(struct kref *kref) +{ + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); + + if (clnt->cl_auth == NULL) { + rpc_free_client(kref); + return; + } + + /* + * Note: RPCSEC_GSS may need to send NULL RPC calls in order to + * release remaining GSS contexts. This mechanism ensures + * that it can do so safely. + */ + kref_init(kref); + rpcauth_release(clnt->cl_auth); + clnt->cl_auth = NULL; + kref_put(kref, rpc_free_client); +} + +/* + * Release reference to the RPC client */ void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, atomic_read(&clnt->cl_users)); + dprintk("RPC: rpc_release_client(%p)\n", clnt); - if (!atomic_dec_and_test(&clnt->cl_users)) - return; - wake_up(&destroy_wait); - if (clnt->cl_oneshot || clnt->cl_dead) - rpc_destroy_client(clnt); + if (list_empty(&clnt->cl_tasks)) + wake_up(&destroy_wait); + kref_put(&clnt->cl_kref, rpc_free_auth); } /** @@ -468,82 +515,96 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) rpc_restore_sigmask(oldset); } -/* - * New rpc_call implementation +static +struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, + struct rpc_message *msg, + int flags, + const struct rpc_call_ops *ops, + void *data) +{ + struct rpc_task *task, *ret; + sigset_t oldset; + + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) { + rpc_release_calldata(ops, data); + return ERR_PTR(-ENOMEM); + } + + /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ + rpc_task_sigmask(task, &oldset); + if (msg != NULL) { + rpc_call_setup(task, msg, 0); + if (task->tk_status != 0) { + ret = ERR_PTR(task->tk_status); + rpc_put_task(task); + goto out; + } + } + atomic_inc(&task->tk_count); + rpc_execute(task); + ret = task; +out: + rpc_restore_sigmask(&oldset); + return ret; +} + +/** + * rpc_call_sync - Perform a synchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags */ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { struct rpc_task *task; - sigset_t oldset; - int status; - - /* If this client is slain all further I/O fails */ - if (clnt->cl_dead) - return -EIO; + int status; BUG_ON(flags & RPC_TASK_ASYNC); - task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); - if (task == NULL) - return -ENOMEM; - - /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - - /* Set up the call info struct and execute the task */ - rpc_call_setup(task, msg, 0); - if (task->tk_status == 0) { - atomic_inc(&task->tk_count); - rpc_execute(task); - } + task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); status = task->tk_status; rpc_put_task(task); - rpc_restore_sigmask(&oldset); return status; } -/* - * New rpc_call implementation +/** + * rpc_call_async - Perform an asynchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags + * @ops: RPC call ops + * @data: user call data */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; - sigset_t oldset; - int status; - - /* If this client is slain all further I/O fails */ - status = -EIO; - if (clnt->cl_dead) - goto out_release; - - flags |= RPC_TASK_ASYNC; - - /* Create/initialize a new RPC task */ - status = -ENOMEM; - if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) - goto out_release; - - /* Mask signals on GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - rpc_call_setup(task, msg, 0); - - /* Set up the call info struct and execute the task */ - status = task->tk_status; - if (status == 0) - rpc_execute(task); - else - rpc_put_task(task); - - rpc_restore_sigmask(&oldset); - return status; -out_release: - rpc_release_calldata(tk_ops, data); - return status; + task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt: pointer to RPC client + * @flags: RPC flags + * @ops: RPC call ops + * @data: user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *tk_ops, + void *data) +{ + return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); +} +EXPORT_SYMBOL(rpc_run_task); void rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) @@ -745,7 +806,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_auth->au_cslack; + unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -843,10 +904,8 @@ call_encode(struct rpc_task *task) if (encode == NULL) return; - lock_kernel(); task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - unlock_kernel(); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ rpc_delay(task, 3*HZ); @@ -1177,10 +1236,8 @@ call_decode(struct rpc_task *task) task->tk_action = rpc_exit_task; if (decode) { - lock_kernel(); task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, task->tk_msg.rpc_resp); - unlock_kernel(); } dprintk("RPC: %5u call_decode result %d\n", task->tk_pid, task->tk_status); @@ -1273,9 +1330,9 @@ call_verify(struct rpc_task *task) * - if it isn't pointer subtraction in the NFS client may give * undefined results */ - printk(KERN_WARNING - "call_verify: XDR representation not a multiple of" - " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + dprintk("RPC: %5u %s: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_pid, __FUNCTION__, + task->tk_rqstp->rq_rcv_buf.len); goto out_eio; } if ((len -= 3) < 0) @@ -1283,7 +1340,8 @@ call_verify(struct rpc_task *task) p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { - printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); + dprintk("RPC: %5u %s: not an RPC reply: %x\n", + task->tk_pid, __FUNCTION__, n); goto out_garbage; } if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { @@ -1334,7 +1392,8 @@ call_verify(struct rpc_task *task) "authentication.\n", task->tk_client->cl_server); break; default: - printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); + dprintk("RPC: %5u %s: unknown auth error: %x\n", + task->tk_pid, __FUNCTION__, n); error = -EIO; } dprintk("RPC: %5u %s: call rejected %d\n", @@ -1342,7 +1401,8 @@ call_verify(struct rpc_task *task) goto out_err; } if (!(p = rpcauth_checkverf(task, p))) { - printk(KERN_WARNING "call_verify: auth check failed\n"); + dprintk("RPC: %5u %s: auth check failed\n", + task->tk_pid, __FUNCTION__); goto out_garbage; /* bad verifier, retry */ } len = p - (__be32 *)iov->iov_base - 1; @@ -1381,7 +1441,8 @@ call_verify(struct rpc_task *task) task->tk_pid, __FUNCTION__); break; /* retry */ default: - printk(KERN_WARNING "call_verify: server accept status: %x\n", n); + dprintk("RPC: %5u %s: server accept status: %x\n", + task->tk_pid, __FUNCTION__, n); /* Also retry */ } @@ -1395,14 +1456,16 @@ out_garbage: out_retry: return ERR_PTR(-EAGAIN); } - printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); out_eio: error = -EIO; out_err: rpc_exit(task, error); + dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, + __FUNCTION__, error); return ERR_PTR(error); out_overflow: - printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); + dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid, + __FUNCTION__); goto out_garbage; } @@ -1421,7 +1484,7 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt, int flags) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, @@ -1432,3 +1495,51 @@ int rpc_ping(struct rpc_clnt *clnt, int flags) put_rpccred(msg.rpc_cred); return err; } + +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + .rpc_cred = cred, + }; + return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); +} +EXPORT_SYMBOL(rpc_call_null); + +#ifdef RPC_DEBUG +void rpc_show_tasks(void) +{ + struct rpc_clnt *clnt; + struct rpc_task *t; + + spin_lock(&rpc_client_lock); + if (list_empty(&all_clients)) + goto out; + printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + "-rpcwait -action- ---ops--\n"); + list_for_each_entry(clnt, &all_clients, cl_clients) { + if (list_empty(&clnt->cl_tasks)) + continue; + spin_lock(&clnt->cl_lock); + list_for_each_entry(t, &clnt->cl_tasks, tk_task) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + + printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_flags, t->tk_status, + t->tk_client, + (t->tk_client ? t->tk_client->cl_prog : 0), + t->tk_rqstp, t->tk_timeout, + rpc_waitq, + t->tk_action, t->tk_ops); + } + spin_unlock(&clnt->cl_lock); + } +out: + spin_unlock(&rpc_client_lock); +} +#endif diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5887457dc93..e787b6a43ee 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -344,7 +344,7 @@ rpc_info_open(struct inode *inode, struct file *file) mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); m->private = clnt; } else { single_release(inode, file); @@ -448,6 +448,15 @@ void rpc_put_mount(void) simple_release_fs(&rpc_mount, &rpc_mount_count); } +static int rpc_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations rpc_dentry_operations = { + .d_delete = rpc_delete_dentry, +}; + static int rpc_lookup_parent(char *path, struct nameidata *nd) { @@ -506,7 +515,7 @@ rpc_get_inode(struct super_block *sb, int mode) * FIXME: This probably has races. */ static void -rpc_depopulate(struct dentry *parent) +rpc_depopulate(struct dentry *parent, int start, int eof) { struct inode *dir = parent->d_inode; struct list_head *pos, *next; @@ -518,6 +527,10 @@ repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { dentry = list_entry(pos, struct dentry, d_u.d_child); + if (!dentry->d_inode || + dentry->d_inode->i_ino < start || + dentry->d_inode->i_ino >= eof) + continue; spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); @@ -533,11 +546,11 @@ repeat: if (n) { do { dentry = dvec[--n]; - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); + if (S_ISREG(dentry->d_inode->i_mode)) simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + else if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(dir, dentry); + d_delete(dentry); dput(dentry); } while (n); goto repeat; @@ -560,6 +573,7 @@ rpc_populate(struct dentry *parent, dentry = d_alloc_name(parent, files[i].name); if (!dentry) goto out_bad; + dentry->d_op = &rpc_dentry_operations; mode = files[i].mode; inode = rpc_get_inode(dir->i_sb, mode); if (!inode) { @@ -607,21 +621,14 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int error; - - shrink_dcache_parent(dentry); - if (d_unhashed(dentry)) - return 0; - if ((error = simple_rmdir(dir, dentry)) != 0) - return error; - if (!error) { - inode_dir_notify(dir, DN_DELETE); - d_drop(dentry); - } - return 0; + error = simple_rmdir(dir, dentry); + if (!error) + d_delete(dentry); + return error; } static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len) +rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -630,7 +637,9 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len) dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; - if (dentry->d_inode) { + if (!dentry->d_inode) + dentry->d_op = &rpc_dentry_operations; + else if (exclusive) { dput(dentry); dentry = ERR_PTR(-EEXIST); goto out_err; @@ -649,7 +658,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); - dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1); if (IS_ERR(dentry)) rpc_release_path(nd); return dentry; @@ -681,7 +690,7 @@ out: rpc_release_path(&nd); return dentry; err_depopulate: - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); __rpc_rmdir(dir, dentry); err_dput: dput(dentry); @@ -701,7 +710,7 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); error = __rpc_rmdir(dir, dentry); dput(dentry); mutex_unlock(&dir->i_mutex); @@ -716,10 +725,21 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_create(parent, name, strlen(name)); + dentry = rpc_lookup_create(parent, name, strlen(name), 0); if (IS_ERR(dentry)) return dentry; dir = parent->d_inode; + if (dentry->d_inode) { + rpci = RPC_I(dentry->d_inode); + if (rpci->private != private || + rpci->ops != ops || + rpci->flags != flags) { + dput (dentry); + dentry = ERR_PTR(-EBUSY); + } + rpci->nkern_readwriters++; + goto out; + } inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; @@ -730,6 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->private = private; rpci->flags = flags; rpci->ops = ops; + rpci->nkern_readwriters = 1; inode_dir_notify(dir, DN_CREATE); dget(dentry); out: @@ -754,13 +775,11 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (!d_unhashed(dentry)) { - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + if (!error) + d_delete(dentry); } dput(dentry); mutex_unlock(&dir->i_mutex); @@ -833,6 +852,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) rpci->nreaders = 0; rpci->nwriters = 0; INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->in_downcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6c7aa8a1f0c..d1740dbab99 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -12,6 +12,8 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ +#include <linux/module.h> + #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> @@ -184,8 +186,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_INTR), }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); @@ -238,6 +240,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -246,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -#ifdef CONFIG_ROOT_NFS /** - * rpcb_getport_external - obtain the port for an RPC service on a given host + * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * * Called from outside the RPC client in a synchronous task context. + * Uses default timeout parameters specified by underlying transport. * - * For now, this supports only version 2 queries, but is used only by - * mount_clnt for NFS_ROOT. + * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 */ -int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, - __u32 vers, int prot) +int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, @@ -277,15 +279,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, char hostname[40]; int status; - dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", + __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) @@ -294,16 +297,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, } return status; } -#endif +EXPORT_SYMBOL_GPL(rpcb_getport_sync); /** - * rpcb_getport - obtain the port for a given RPC service on a given host + * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ -void rpcb_getport(struct rpc_task *task) +void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; int bind_version; @@ -314,17 +317,17 @@ void rpcb_getport(struct rpc_task *task) struct sockaddr addr; int status; - dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n", - task->tk_pid, clnt->cl_server, - clnt->cl_prog, clnt->cl_vers, xprt->prot); + dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", + task->tk_pid, __FUNCTION__, + clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { status = -EACCES; /* tell caller to check again */ - dprintk("RPC: %5u rpcb_getport waiting for another binder\n", - task->tk_pid); + dprintk("RPC: %5u %s: waiting for another binder\n", + task->tk_pid, __FUNCTION__); goto bailout_nowake; } @@ -335,27 +338,28 @@ void rpcb_getport(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid); + dprintk("RPC: %5u %s: already bound\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EACCES; /* tell caller to try again later */ - dprintk("RPC: %5u rpcb_getport no more getport versions " - "available\n", task->tk_pid); + dprintk("RPC: %5u %s: no more getport versions available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; - dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n", - task->tk_pid, bind_version); + dprintk("RPC: %5u %s: trying rpcbind version %u\n", + task->tk_pid, __FUNCTION__, bind_version); map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u rpcb_getport no memory available\n", - task->tk_pid); + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } map->r_prog = clnt->cl_prog; @@ -373,16 +377,17 @@ void rpcb_getport(struct rpc_task *task) rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n", - task->tk_pid, PTR_ERR(rpcb_clnt)); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); goto bailout; } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; - dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", - task->tk_pid); + dprintk("RPC: %5u %s: rpc_run_task failed\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } rpc_put_task(child); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 944d75396fb..2ac43c41c3a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -25,7 +25,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa -static int rpc_task_id; #endif /* @@ -40,7 +39,6 @@ static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); -static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -50,23 +48,13 @@ static void rpc_release_task(struct rpc_task *task); static RPC_WAITQ(delay_queue, "delayq"); /* - * All RPC tasks are linked into this list - */ -static LIST_HEAD(all_tasks); - -/* * rpciod-related stuff */ static DEFINE_MUTEX(rpciod_mutex); -static unsigned int rpciod_users; +static atomic_t rpciod_users = ATOMIC_INIT(0); struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for other critical sections of code. - */ -static DEFINE_SPINLOCK(rpc_sched_lock); - -/* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). @@ -267,18 +255,33 @@ static int rpc_wait_bit_interruptible(void *word) return 0; } +#ifdef RPC_DEBUG +static void rpc_task_set_debuginfo(struct rpc_task *task) +{ + static atomic_t rpc_pid; + + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = atomic_inc_return(&rpc_pid); +} +#else +static inline void rpc_task_set_debuginfo(struct rpc_task *task) +{ +} +#endif + static void rpc_set_active(struct rpc_task *task) { + struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; - spin_lock(&rpc_sched_lock); -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif + rpc_task_set_debuginfo(task); /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); + clnt = task->tk_client; + if (clnt != NULL) { + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } /* @@ -818,6 +821,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons if (tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; task->tk_calldata = calldata; + INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -830,7 +834,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_workqueue = rpciod_workqueue; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) @@ -860,9 +864,7 @@ static void rpc_free_task(struct rcu_head *rcu) } /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { @@ -870,7 +872,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task = rpc_alloc_task(); if (!task) - goto cleanup; + goto out; rpc_init_task(task, clnt, flags, tk_ops, calldata); @@ -878,16 +880,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; - -cleanup: - /* Check whether to release the client */ - if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ - rpc_release_client(clnt); - } - goto out; } @@ -920,11 +912,13 @@ static void rpc_release_task(struct rpc_task *task) #endif dprintk("RPC: %5u release task\n", task->tk_pid); - /* Remove from global task list */ - spin_lock(&rpc_sched_lock); - list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); - + if (!list_empty(&task->tk_task)) { + struct rpc_clnt *clnt = task->tk_client; + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + } BUG_ON (RPC_IS_QUEUED(task)); /* Synchronously delete any running timer */ @@ -939,29 +933,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, - void *data) -{ - struct rpc_task *task; - task = rpc_new_task(clnt, flags, ops, data); - if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); - } - atomic_inc(&task->tk_count); - rpc_execute(task); - return task; -} -EXPORT_SYMBOL(rpc_run_task); - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -969,44 +940,25 @@ EXPORT_SYMBOL(rpc_run_task); void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; - struct list_head *le; - dprintk("RPC: killing all tasks for client %p\n", clnt); + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); /* * Spin lock all_tasks to prevent changes... */ - spin_lock(&rpc_sched_lock); - alltask_for_each(rovr, le, &all_tasks) { + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; - if (!clnt || rovr->tk_client == clnt) { + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); rpc_wake_up_task(rovr); } } - spin_unlock(&rpc_sched_lock); -} - -static void rpciod_killall(void) -{ - unsigned long flags; - - while (!list_empty(&all_tasks)) { - clear_thread_flag(TIF_SIGPENDING); - rpc_killall_tasks(NULL); - flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_tasks)) { - dprintk("RPC: rpciod_killall: waiting for tasks " - "to exit\n"); - yield(); - } - } - - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + spin_unlock(&clnt->cl_lock); } /* @@ -1018,28 +970,27 @@ rpciod_up(void) struct workqueue_struct *wq; int error = 0; + if (atomic_inc_not_zero(&rpciod_users)) + return 0; + mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_up: users %u\n", rpciod_users); - rpciod_users++; - if (rpciod_workqueue) - goto out; - /* - * If there's no pid, we should be the first user. - */ - if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); + + /* Guard against races with rpciod_down() */ + if (rpciod_workqueue != NULL) + goto out_ok; /* * Create the rpciod thread and wait for it to start. */ + dprintk("RPC: creating workqueue rpciod\n"); error = -ENOMEM; wq = create_workqueue("rpciod"); - if (wq == NULL) { - printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); - rpciod_users--; + if (wq == NULL) goto out; - } + rpciod_workqueue = wq; error = 0; +out_ok: + atomic_inc(&rpciod_users); out: mutex_unlock(&rpciod_mutex); return error; @@ -1048,59 +999,19 @@ out: void rpciod_down(void) { + if (!atomic_dec_and_test(&rpciod_users)) + return; + mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_down sema %u\n", rpciod_users); - if (rpciod_users) { - if (--rpciod_users) - goto out; - } else - printk(KERN_WARNING "rpciod_down: no users??\n"); + dprintk("RPC: destroying workqueue rpciod\n"); - if (!rpciod_workqueue) { - dprintk("RPC: rpciod_down: Nothing to do!\n"); - goto out; + if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; } - rpciod_killall(); - - destroy_workqueue(rpciod_workqueue); - rpciod_workqueue = NULL; - out: mutex_unlock(&rpciod_mutex); } -#ifdef RPC_DEBUG -void rpc_show_tasks(void) -{ - struct list_head *le; - struct rpc_task *t; - - spin_lock(&rpc_sched_lock); - if (list_empty(&all_tasks)) { - spin_unlock(&rpc_sched_lock); - return; - } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); - alltask_for_each(t, le, &all_tasks) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); - } - spin_unlock(&rpc_sched_lock); -} -#endif - void rpc_destroy_mempool(void) { diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 73075dec83c..384c4ad5ab8 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -28,15 +28,11 @@ EXPORT_SYMBOL(rpc_init_task); EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpciod_down); -EXPORT_SYMBOL(rpciod_up); -EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); -EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_call_sync); @@ -61,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_unregister); EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); -EXPORT_SYMBOL(rpcauth_free_credcache); +EXPORT_SYMBOL(rpcauth_destroy_credcache); EXPORT_SYMBOL(rpcauth_init_credcache); EXPORT_SYMBOL(put_rpccred); @@ -156,6 +152,7 @@ init_sunrpc(void) cache_register(&ip_map_cache); cache_register(&unix_gid_cache); init_socket_xprt(); + rpcauth_init_module(); out: return err; } @@ -163,6 +160,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpcauth_remove_module(); cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5baf48de255..64b9b8c743c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -644,6 +644,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; + struct sockaddr *sin; int len; len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, @@ -654,6 +655,19 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); rqstp->rq_addrlen = svsk->sk_remotelen; + /* Destination address in request is needed for binding the + * source address in RPC callbacks later. + */ + sin = (struct sockaddr *)&svsk->sk_local; + switch (sin->sa_family) { + case AF_INET: + rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; + break; + case AF_INET6: + rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; + break; + } + dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", svsk, iov[0].iov_base, iov[0].iov_len, len); @@ -1064,6 +1078,12 @@ svc_tcp_accept(struct svc_sock *svsk) goto failed; memcpy(&newsvsk->sk_remote, sin, slen); newsvsk->sk_remotelen = slen; + err = kernel_getsockname(newsock, sin, &slen); + if (unlikely(err < 0)) { + dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + slen = offsetof(struct sockaddr, sa_data); + } + memcpy(&newsvsk->sk_local, sin, slen); svc_sock_received(newsvsk); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5b05b73e4c1..c8c2edccad7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); } /* @@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data) if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); @@ -886,27 +886,24 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i /** * xprt_create_transport - create an RPC transport - * @proto: requested transport protocol - * @ap: remote peer address - * @size: length of address - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) +struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; - switch (proto) { + switch (args->proto) { case IPPROTO_UDP: - xprt = xs_setup_udp(ap, size, to); + xprt = xs_setup_udp(args); break; case IPPROTO_TCP: - xprt = xs_setup_tcp(ap, size, to); + xprt = xs_setup_tcp(args); break; default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - proto); + args->proto); return ERR_PTR(-EIO); } if (IS_ERR(xprt)) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cc33c5880ab..4ae7eed7f61 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -235,6 +235,7 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; + struct sockaddr_storage addr; unsigned short port; /* @@ -653,8 +654,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&transport->connect_worker); - flush_scheduled_work(); + cancel_rearming_delayed_work(&transport->connect_worker); xprt_disconnect(xprt); xs_close(xprt); @@ -1001,7 +1001,7 @@ static void xs_tcp_state_change(struct sock *sk) /* Try to schedule an autoclose RPC calls */ set_bit(XPRT_CLOSE_WAIT, &xprt->state); if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); default: xprt_disconnect(xprt); } @@ -1146,31 +1146,36 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) sap->sin_port = htons(port); } -static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; + struct sockaddr_in *sa; int err; unsigned short port = transport->port; + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in *)&transport->addr; + myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); + if (!transport->xprt.resvport) + break; if (err == 0) { transport->port = port; - dprintk("RPC: xs_bindresvport bound to port %u\n", - port); - return 0; + break; } if (port <= xprt_min_resvport) port = xprt_max_resvport; else port--; } while (err == -EADDRINUSE && port != transport->port); - - dprintk("RPC: can't bind to reserved port (%d).\n", -err); + dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", + NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); return err; } @@ -1229,7 +1234,7 @@ static void xs_udp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1316,7 +1321,7 @@ static void xs_tcp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1410,18 +1415,16 @@ static void xs_connect(struct rpc_task *task) dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&transport->connect_worker, - xprt->reestablish_timeout); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, + xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_delayed_work(&transport->connect_worker, 0); - - /* flush_scheduled_work can sleep... */ - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, 0); } } @@ -1476,7 +1479,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1493,7 +1496,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1505,12 +1508,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; - if (addrlen > sizeof(xprt->addr)) { + if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: xs_setup_xprt: address too large\n"); return ERR_PTR(-EBADF); } @@ -1532,8 +1535,10 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns return ERR_PTR(-ENOMEM); } - memcpy(&xprt->addr, addr, addrlen); - xprt->addrlen = addrlen; + memcpy(&xprt->addr, args->dstaddr, args->addrlen); + xprt->addrlen = args->addrlen; + if (args->srcaddr) + memcpy(&new->addr, args->srcaddr, args->addrlen); new->port = xs_get_random_port(); return xprt; @@ -1541,22 +1546,20 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns /** * xs_setup_udp - Set up transport to use a UDP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_UDP; @@ -1572,8 +1575,8 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_udp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); @@ -1586,22 +1589,20 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ /** * xs_setup_tcp - Set up transport to use a TCP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_TCP; @@ -1616,8 +1617,8 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_tcp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); |