From f9bdad8ca8a40270576dd8751ac225febaa87f93 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 29 Oct 2018 13:23:40 -0400 Subject: NFS NFSD: defining nl4_servers structure needed by both These structures are needed by COPY_NOTIFY on the client and needed by the nfsd as well Reviewed-by: Jeff Layton Signed-off-by: Olga Kornievskaia --- include/linux/nfs4.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index fd59904a282c..5810e248c1bd 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -16,6 +16,7 @@ #include #include #include +#include enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, @@ -674,4 +675,27 @@ struct nfs4_op_map { } u; }; +struct nfs42_netaddr { + char netid[RPCBIND_MAXNETIDLEN]; + char addr[RPCBIND_MAXUADDRLEN + 1]; + u32 netid_len; + u32 addr_len; +}; + +enum netloc_type4 { + NL4_NAME = 1, + NL4_URL = 2, + NL4_NETADDR = 3, +}; + +struct nl4_server { + enum netloc_type4 nl4_type; + union { + struct { /* NL4_NAME, NL4_URL */ + int nl4_str_sz; + char nl4_str[NFS4_OPAQUE_LIMIT + 1]; + }; + struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ + } u; +}; #endif -- cgit v1.2.3 From 0491567b51efeca807da1125a1a0d5193875e286 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 16:14:30 -0400 Subject: NFS: add COPY_NOTIFY operation Try using the delegation stateid, then the open stateid. Only NL4_NETATTR, No support for NL4_NAME and NL4_URL. Allow only one source server address to be returned for now. To distinguish between same server copy offload ("intra") and a copy between different server ("inter"), do a check of server owner identity and also make sure server is capable of doing a copy offload. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- fs/nfs/nfs42.h | 12 ++++ fs/nfs/nfs42proc.c | 91 ++++++++++++++++++++++++ fs/nfs/nfs42xdr.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 2 + fs/nfs/nfs4client.c | 2 +- fs/nfs/nfs4file.c | 20 +++++- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 16 +++++ 11 files changed, 323 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 901cca7542f9..4995731a6714 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -13,6 +13,7 @@ #define PNFS_LAYOUTSTATS_MAXDEV (4) /* nfs4.2proc.c */ +#ifdef CONFIG_NFS_V4_2 int nfs42_proc_allocate(struct file *, loff_t, loff_t); ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); @@ -23,5 +24,16 @@ int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, const struct nfs42_layout_error *errors, size_t n); +int nfs42_proc_copy_notify(struct file *, struct file *, + struct nfs42_copy_notify_res *); +static inline bool nfs42_files_from_same_server(struct file *in, + struct file *out) +{ + struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client; + struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client; + return nfs4_check_serverowner_major_id(c_in->cl_serverowner, + c_out->cl_serverowner); +} +#endif /* CONFIG_NFS_V4_2 */ #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 5196bfa7894d..6317dd89cf43 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -3,6 +3,7 @@ * Copyright (c) 2014 Anna Schumaker */ #include +#include #include #include #include @@ -15,10 +16,30 @@ #include "pnfs.h" #include "nfs4session.h" #include "internal.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); +static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) +{ + struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client; + unsigned short port = 2049; + + rcu_read_lock(); + naddr->netid_len = scnprintf(naddr->netid, + sizeof(naddr->netid), "%s", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_NETID)); + naddr->addr_len = scnprintf(naddr->addr, + sizeof(naddr->addr), + "%s.%u.%u", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + port >> 8, port & 255); + rcu_read_unlock(); +} + static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, struct nfs_lock_context *lock, loff_t offset, loff_t len) { @@ -459,6 +480,76 @@ static int nfs42_do_offload_cancel_async(struct file *dst, return status; } +int _nfs42_proc_copy_notify(struct file *src, struct file *dst, + struct nfs42_copy_notify_args *args, + struct nfs42_copy_notify_res *res) +{ + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY], + .rpc_argp = args, + .rpc_resp = res, + }; + int status; + struct nfs_open_context *ctx; + struct nfs_lock_context *l_ctx; + + ctx = get_nfs_open_context(nfs_file_open_context(src)); + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + return PTR_ERR(l_ctx); + + status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, + FMODE_READ); + nfs_put_lock_context(l_ctx); + if (status) + return status; + + status = nfs4_call_sync(src_server->client, src_server, &msg, + &args->cna_seq_args, &res->cnr_seq_res, 0); + if (status == -ENOTSUPP) + src_server->caps &= ~NFS_CAP_COPY_NOTIFY; + + put_nfs_open_context(nfs_file_open_context(src)); + return status; +} + +int nfs42_proc_copy_notify(struct file *src, struct file *dst, + struct nfs42_copy_notify_res *res) +{ + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); + struct nfs42_copy_notify_args *args; + struct nfs4_exception exception = { + .inode = file_inode(src), + }; + int status; + + if (!(src_server->caps & NFS_CAP_COPY_NOTIFY)) + return -EOPNOTSUPP; + + args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS); + if (args == NULL) + return -ENOMEM; + + args->cna_src_fh = NFS_FH(file_inode(src)), + args->cna_dst.nl4_type = NL4_NETADDR; + nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr); + exception.stateid = &args->cna_src_stateid; + + do { + status = _nfs42_proc_copy_notify(src, dst, args, res); + if (status == -ENOTSUPP) { + status = -EOPNOTSUPP; + goto out; + } + status = nfs4_handle_exception(src_server, status, &exception); + } while (exception.retry); + +out: + kfree(args); + return status; +} + static loff_t _nfs42_proc_llseek(struct file *filep, struct nfs_lock_context *lock, loff_t offset, int whence) { diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index aed865a84629..ccabc0cd93dd 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -29,6 +29,16 @@ #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) +#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) +#define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \ + 3 + /* cnr_lease_time */\ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + 1 + /* Support 1 cnr_source_server */\ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) @@ -99,6 +109,12 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) +#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_copy_notify_maxsz) +#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_copy_notify_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -166,6 +182,26 @@ static void encode_allocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_nl4_server(struct xdr_stream *xdr, + const struct nl4_server *ns) +{ + encode_uint32(xdr, ns->nl4_type); + switch (ns->nl4_type) { + case NL4_NAME: + case NL4_URL: + encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str); + break; + case NL4_NETADDR: + encode_string(xdr, ns->u.nl4_addr.netid_len, + ns->u.nl4_addr.netid); + encode_string(xdr, ns->u.nl4_addr.addr_len, + ns->u.nl4_addr.addr); + break; + default: + WARN_ON_ONCE(1); + } +} + static void encode_copy(struct xdr_stream *xdr, const struct nfs42_copy_args *args, struct compound_hdr *hdr) @@ -191,6 +227,15 @@ static void encode_offload_cancel(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->osa_stateid); } +static void encode_copy_notify(struct xdr_stream *xdr, + const struct nfs42_copy_notify_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->cna_src_stateid); + encode_nl4_server(xdr, &args->cna_dst); +} + static void encode_deallocate(struct xdr_stream *xdr, const struct nfs42_falloc_args *args, struct compound_hdr *hdr) @@ -354,6 +399,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode COPY_NOTIFY request + */ +static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_copy_notify_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->cna_seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->cna_seq_args, &hdr); + encode_putfh(xdr, args->cna_src_fh, &hdr); + encode_copy_notify(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode DEALLOCATE request */ @@ -490,6 +554,58 @@ static int decode_write_response(struct xdr_stream *xdr, return decode_verifier(xdr, &res->verifier.verifier); } +static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns) +{ + struct nfs42_netaddr *naddr; + uint32_t dummy; + char *dummy_str; + __be32 *p; + int status; + + /* nl_type */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + ns->nl4_type = be32_to_cpup(p); + switch (ns->nl4_type) { + case NL4_NAME: + case NL4_URL: + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(&ns->u.nl4_str, dummy_str, dummy); + ns->u.nl4_str_sz = dummy; + break; + case NL4_NETADDR: + naddr = &ns->u.nl4_addr; + + /* netid string */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > RPCBIND_MAXNETIDLEN)) + return -EIO; + naddr->netid_len = dummy; + memcpy(naddr->netid, dummy_str, naddr->netid_len); + + /* uaddr string */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > RPCBIND_MAXUADDRLEN)) + return -EIO; + naddr->addr_len = dummy; + memcpy(naddr->addr, dummy_str, naddr->addr_len); + break; + default: + WARN_ON_ONCE(1); + return -EIO; + } + return 0; +} + static int decode_copy_requirements(struct xdr_stream *xdr, struct nfs42_copy_res *res) { __be32 *p; @@ -529,6 +645,42 @@ static int decode_offload_cancel(struct xdr_stream *xdr, return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); } +static int decode_copy_notify(struct xdr_stream *xdr, + struct nfs42_copy_notify_res *res) +{ + __be32 *p; + int status, count; + + status = decode_op_hdr(xdr, OP_COPY_NOTIFY); + if (status) + return status; + /* cnr_lease_time */ + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + return -EIO; + p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds); + res->cnr_lease_time.nseconds = be32_to_cpup(p); + + status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE); + if (unlikely(status)) + return -EIO; + + /* number of source addresses */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + count = be32_to_cpup(p); + if (count > 1) + pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n", + __func__, count); + + status = decode_nl4_server(xdr, &res->cnr_src); + if (unlikely(status)) + return -EIO; + return 0; +} + static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_DEALLOCATE); @@ -656,6 +808,32 @@ out: return status; } +/* + * Decode COPY_NOTIFY response + */ +static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_copy_notify_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->cnr_seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_copy_notify(xdr, res); + +out: + return status; +} + /* * Decode DEALLOCATE request */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 16b2e5cc3e94..8e590b424d75 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -457,6 +457,8 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, const struct cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_notify_server(struct nfs_client *); +bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index da6204025a2d..54aaf553d009 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -629,7 +629,7 @@ out: /* * Returns true if the server major ids match */ -static bool +bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, struct nfs41_server_owner *o2) { diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 339663d04bf8..686a6c4071e3 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,6 +133,9 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + struct nfs42_copy_notify_res *cn_resp = NULL; + ssize_t ret; + /* Only offload copy if superblock is the same */ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; @@ -140,7 +143,22 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; - return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); + if (!nfs42_files_from_same_server(file_in, file_out)) { + cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), + GFP_NOFS); + if (unlikely(cn_resp == NULL)) + return -ENOMEM; + + ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp); + if (ret) { + ret = -EOPNOTSUPP; + goto out; + } + } + ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); +out: + kfree(cn_resp); + return ret; } static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 11eafcfc490b..505045b47670 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9899,6 +9899,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_COPY | NFS_CAP_OFFLOAD_CANCEL + | NFS_CAP_COPY_NOTIFY | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ab07db0f07cd..2f9315de3d7d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7581,6 +7581,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(CLONE, enc_clone, dec_clone), PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), + PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5810e248c1bd..5e7a5261af4e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,6 +537,7 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, + NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..e1c8748e1e82 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -276,5 +276,6 @@ struct nfs_server { #define NFS_CAP_COPY (1U << 24) #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) +#define NFS_CAP_COPY_NOTIFY (1U << 27) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..0a7af40026d7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1463,6 +1463,22 @@ struct nfs42_offload_status_res { int osr_status; }; +struct nfs42_copy_notify_args { + struct nfs4_sequence_args cna_seq_args; + + struct nfs_fh *cna_src_fh; + nfs4_stateid cna_src_stateid; + struct nl4_server cna_dst; +}; + +struct nfs42_copy_notify_res { + struct nfs4_sequence_res cnr_seq_res; + + struct nfstime4 cnr_lease_time; + nfs4_stateid cnr_stateid; + struct nl4_server cnr_src; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; -- cgit v1.2.3 From 1d38f3f0d70008671f4dc055697ff3c3bb44a284 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 11:54:18 -0400 Subject: NFS: add ca_source_server<> to COPY Support only one source server address: the same address that the client and source server use. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- fs/nfs/nfs42.h | 3 ++- fs/nfs/nfs42proc.c | 26 +++++++++++++++++--------- fs/nfs/nfs42xdr.c | 12 ++++++++++-- fs/nfs/nfs4file.c | 7 ++++++- include/linux/nfs_xdr.h | 1 + 5 files changed, 36 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 4995731a6714..02e3810cd889 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -15,7 +15,8 @@ /* nfs4.2proc.c */ #ifdef CONFIG_NFS_V4_2 int nfs42_proc_allocate(struct file *, loff_t, loff_t); -ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); +ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t, + struct nl4_server *, nfs4_stateid *); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6317dd89cf43..e34ade844737 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -243,7 +243,9 @@ static ssize_t _nfs42_proc_copy(struct file *src, struct file *dst, struct nfs_lock_context *dst_lock, struct nfs42_copy_args *args, - struct nfs42_copy_res *res) + struct nfs42_copy_res *res, + struct nl4_server *nss, + nfs4_stateid *cnr_stateid) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], @@ -257,11 +259,15 @@ static ssize_t _nfs42_proc_copy(struct file *src, size_t count = args->count; ssize_t status; - status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, - src_lock, FMODE_READ); - if (status) - return status; - + if (nss) { + args->cp_src = nss; + nfs4_stateid_copy(&args->src_stateid, cnr_stateid); + } else { + status = nfs4_set_rw_stateid(&args->src_stateid, + src_lock->open_context, src_lock, FMODE_READ); + if (status) + return status; + } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); if (status) @@ -325,8 +331,9 @@ out: } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, - struct file *dst, loff_t pos_dst, - size_t count) + struct file *dst, loff_t pos_dst, size_t count, + struct nl4_server *nss, + nfs4_stateid *cnr_stateid) { struct nfs_server *server = NFS_SERVER(file_inode(dst)); struct nfs_lock_context *src_lock; @@ -368,7 +375,8 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, inode_lock(file_inode(dst)); err = _nfs42_proc_copy(src, src_lock, dst, dst_lock, - &args, &res); + &args, &res, + nss, cnr_stateid); inode_unlock(file_inode(dst)); if (err >= 0) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index ccabc0cd93dd..c03f3246d6c5 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -21,7 +21,10 @@ #define encode_copy_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ - 2 + 2 + 2 + 1 + 1 + 1) + 2 + 2 + 2 + 1 + 1 + 1 +\ + 1 + /* One cnr_source_server */\ + 1 + /* nl4_type */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) #define decode_copy_maxsz (op_decode_hdr_maxsz + \ NFS42_WRITE_RES_SIZE + \ 1 /* cr_consecutive */ + \ @@ -216,7 +219,12 @@ static void encode_copy(struct xdr_stream *xdr, encode_uint32(xdr, 1); /* consecutive = true */ encode_uint32(xdr, args->sync); - encode_uint32(xdr, 0); /* src server list */ + if (args->cp_src == NULL) { /* intra-ssc */ + encode_uint32(xdr, 0); /* no src server list */ + return; + } + encode_uint32(xdr, 1); /* supporting 1 server */ + encode_nl4_server(xdr, args->cp_src); } static void encode_offload_cancel(struct xdr_stream *xdr, diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 686a6c4071e3..b68b41be6d9f 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -134,6 +134,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, size_t count, unsigned int flags) { struct nfs42_copy_notify_res *cn_resp = NULL; + struct nl4_server *nss = NULL; + nfs4_stateid *cnrs = NULL; ssize_t ret; /* Only offload copy if superblock is the same */ @@ -154,8 +156,11 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = -EOPNOTSUPP; goto out; } + nss = &cn_resp->cnr_src; + cnrs = &cn_resp->cnr_stateid; } - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); + ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count, + nss, cnrs); out: kfree(cn_resp); return ret; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0a7af40026d7..008facac8a30 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1435,6 +1435,7 @@ struct nfs42_copy_args { u64 count; bool sync; + struct nl4_server *cp_src; }; struct nfs42_write_res { -- cgit v1.2.3 From 0e65a32c8a569db363048e17a708b1a0913adbef Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 14 Jun 2019 14:38:40 -0400 Subject: NFS: handle source server reboot When the source server reboots after a server-to-server copy was issued, we need to retry the copy from COPY_NOTIFY. We need to detect that the source server rebooted and there is a copy waiting on a destination server and wake it up. Signed-off-by: Olga Kornievskaia --- fs/nfs/nfs42proc.c | 68 ++++++++++++++++++++++++++++++++++---------------- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4file.c | 3 +++ fs/nfs/nfs4state.c | 26 +++++++++++++++---- include/linux/nfs_fs.h | 4 ++- 5 files changed, 75 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 5d833f5748e9..9c7feacb0358 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -153,22 +153,26 @@ out_unlock: } static int handle_async_copy(struct nfs42_copy_res *res, - struct nfs_server *server, + struct nfs_server *dst_server, + struct nfs_server *src_server, struct file *src, struct file *dst, - nfs4_stateid *src_stateid) + nfs4_stateid *src_stateid, + bool *restart) { struct nfs4_copy_state *copy, *tmp_copy; int status = NFS4_OK; bool found_pending = false; - struct nfs_open_context *ctx = nfs_file_open_context(dst); + struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); + struct nfs_open_context *src_ctx = nfs_file_open_context(src); copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); if (!copy) return -ENOMEM; - spin_lock(&server->nfs_client->cl_lock); - list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, + spin_lock(&dst_server->nfs_client->cl_lock); + list_for_each_entry(tmp_copy, + &dst_server->nfs_client->pending_cb_stateids, copies) { if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, NFS4_STATEID_SIZE)) @@ -178,7 +182,7 @@ static int handle_async_copy(struct nfs42_copy_res *res, break; } if (found_pending) { - spin_unlock(&server->nfs_client->cl_lock); + spin_unlock(&dst_server->nfs_client->cl_lock); kfree(copy); copy = tmp_copy; goto out; @@ -186,19 +190,32 @@ static int handle_async_copy(struct nfs42_copy_res *res, memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); init_completion(©->completion); - copy->parent_state = ctx->state; + copy->parent_dst_state = dst_ctx->state; + copy->parent_src_state = src_ctx->state; + + list_add_tail(©->copies, &dst_server->ss_copies); + spin_unlock(&dst_server->nfs_client->cl_lock); - list_add_tail(©->copies, &server->ss_copies); - spin_unlock(&server->nfs_client->cl_lock); + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_add_tail(©->src_copies, &src_server->ss_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } status = wait_for_completion_interruptible(©->completion); - spin_lock(&server->nfs_client->cl_lock); + spin_lock(&dst_server->nfs_client->cl_lock); list_del_init(©->copies); - spin_unlock(&server->nfs_client->cl_lock); + spin_unlock(&dst_server->nfs_client->cl_lock); + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_del_init(©->src_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } if (status == -ERESTARTSYS) { goto out_cancel; - } else if (copy->flags) { + } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) { status = -EAGAIN; + *restart = true; goto out_cancel; } out: @@ -247,7 +264,8 @@ static ssize_t _nfs42_proc_copy(struct file *src, struct nfs42_copy_args *args, struct nfs42_copy_res *res, struct nl4_server *nss, - nfs4_stateid *cnr_stateid) + nfs4_stateid *cnr_stateid, + bool *restart) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], @@ -255,7 +273,9 @@ static ssize_t _nfs42_proc_copy(struct file *src, .rpc_resp = res, }; struct inode *dst_inode = file_inode(dst); - struct nfs_server *server = NFS_SERVER(dst_inode); + struct inode *src_inode = file_inode(src); + struct nfs_server *dst_server = NFS_SERVER(dst_inode); + struct nfs_server *src_server = NFS_SERVER(src_inode); loff_t pos_src = args->src_pos; loff_t pos_dst = args->dst_pos; size_t count = args->count; @@ -291,13 +311,15 @@ static ssize_t _nfs42_proc_copy(struct file *src, if (!res->commit_res.verf) return -ENOMEM; } + set_bit(NFS_CLNT_SRC_SSC_COPY_STATE, + &src_lock->open_context->state->flags); set_bit(NFS_CLNT_DST_SSC_COPY_STATE, &dst_lock->open_context->state->flags); - status = nfs4_call_sync(server->client, server, &msg, + status = nfs4_call_sync(dst_server->client, dst_server, &msg, &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) - server->caps &= ~NFS_CAP_COPY; + dst_server->caps &= ~NFS_CAP_COPY; if (status) goto out; @@ -309,8 +331,8 @@ static ssize_t _nfs42_proc_copy(struct file *src, } if (!res->synchronous) { - status = handle_async_copy(res, server, src, dst, - &args->src_stateid); + status = handle_async_copy(res, dst_server, src_server, src, + dst, &args->src_stateid, restart); if (status) return status; } @@ -358,6 +380,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, .stateid = &args.dst_stateid, }; ssize_t err, err2; + bool restart = false; src_lock = nfs_get_lock_context(nfs_file_open_context(src)); if (IS_ERR(src_lock)) @@ -378,7 +401,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, err = _nfs42_proc_copy(src, src_lock, dst, dst_lock, &args, &res, - nss, cnr_stateid); + nss, cnr_stateid, &restart); inode_unlock(file_inode(dst)); if (err >= 0) @@ -388,8 +411,11 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, err = -EOPNOTSUPP; break; } else if (err == -EAGAIN) { - dst_exception.retry = 1; - continue; + if (!restart) { + dst_exception.retry = 1; + continue; + } + break; } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) { args.sync = true; dst_exception.retry = 1; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 814674f073a1..2122748f6f7c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -168,6 +168,7 @@ enum { NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ #ifdef CONFIG_NFS_V4_2 NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/ + NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/ NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */ #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index a932fc9ca9c4..2af30b7f5bfd 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -146,6 +146,7 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; +retry: if (!nfs42_files_from_same_server(file_in, file_out)) { cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), GFP_NOFS); @@ -164,6 +165,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, nss, cnrs); out: kfree(cn_resp); + if (ret == -EAGAIN) + goto retry; return ret; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c45b3007e2af..e799fbe9ac58 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1556,16 +1556,32 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state { struct nfs4_copy_state *copy; - if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) + if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && + !test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags)) return; spin_lock(&sp->so_server->nfs_client->cl_lock); list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { - if (!nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) - continue; + if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && + !nfs4_stateid_match_other(&state->stateid, + ©->parent_dst_state->stateid))) + continue; copy->flags = 1; - complete(©->completion); - break; + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, + &state->flags)) { + clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags); + complete(©->completion); + } + } + list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) { + if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) && + !nfs4_stateid_match_other(&state->stateid, + ©->parent_src_state->stateid))) + continue; + copy->flags = 1; + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, + &state->flags)) + complete(©->completion); } spin_unlock(&sp->so_server->nfs_client->cl_lock); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 570a60c2f4f4..c06b1fd130f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -189,13 +189,15 @@ struct nfs_inode { struct nfs4_copy_state { struct list_head copies; + struct list_head src_copies; nfs4_stateid stateid; struct completion completion; uint64_t count; struct nfs_writeverf verf; int error; int flags; - struct nfs4_state *parent_state; + struct nfs4_state *parent_src_state; + struct nfs4_state *parent_dst_state; }; /* -- cgit v1.2.3 From bf7ca707ae60045342e145c88a83bbe00f66775f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 12:58:14 -0400 Subject: SUNRPC: Add trace points to observe transport congestion control To help debug problems with RPC/RDMA credit management, replace dprintk() call sites in the transport send lock paths with trace events. Similar trace points are defined for the non-congestion paths. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 93 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 22 +++++----- 2 files changed, 106 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffa3c51dbb1a..378233fe5ac7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -777,6 +777,99 @@ TRACE_EVENT(xprt_ping, __get_str(addr), __get_str(port), __entry->status) ); +DECLARE_EVENT_CLASS(xprt_writelock_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + ), + + TP_printk("task:%u@%u snd_task:%u", + __entry->task_id, __entry->client_id, + __entry->snd_task_id) +); + +#define DEFINE_WRITELOCK_EVENT(name) \ + DEFINE_EVENT(xprt_writelock_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_WRITELOCK_EVENT(reserve_xprt); +DEFINE_WRITELOCK_EVENT(release_xprt); + +DECLARE_EVENT_CLASS(xprt_cong_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + __field(unsigned long, cong) + __field(unsigned long, cwnd) + __field(bool, wait) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + __entry->cong = xprt->cong; + __entry->cwnd = xprt->cwnd; + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); + ), + + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", + __entry->task_id, __entry->client_id, + __entry->snd_task_id, __entry->cong, __entry->cwnd, + __entry->wait ? " (wait)" : "") +); + +#define DEFINE_CONG_EVENT(name) \ + DEFINE_EVENT(xprt_cong_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_CONG_EVENT(reserve_cong); +DEFINE_CONG_EVENT(release_cong); +DEFINE_CONG_EVENT(get_cong); +DEFINE_CONG_EVENT(put_cong); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8a45b3ccc313..fcbeb56c82cd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -205,20 +205,20 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) - return 1; + goto out_locked; goto out_sleep; } if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) goto out_unlock; xprt->snd_task = task; +out_locked: + trace_xprt_reserve_xprt(xprt, task); return 1; out_unlock: xprt_clear_locked(xprt); out_sleep: - dprintk("RPC: %5u failed to lock transport %p\n", - task->tk_pid, xprt); task->tk_status = -EAGAIN; if (RPC_IS_SOFT(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, @@ -269,23 +269,22 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) - return 1; + goto out_locked; goto out_sleep; } if (req == NULL) { xprt->snd_task = task; - return 1; + goto out_locked; } if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) goto out_unlock; if (!xprt_need_congestion_window_wait(xprt)) { xprt->snd_task = task; - return 1; + goto out_locked; } out_unlock: xprt_clear_locked(xprt); out_sleep: - dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_status = -EAGAIN; if (RPC_IS_SOFT(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, @@ -293,6 +292,9 @@ out_sleep: else rpc_sleep_on(&xprt->sending, task, NULL); return 0; +out_locked: + trace_xprt_reserve_cong(xprt, task); + return 1; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -357,6 +359,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } + trace_xprt_release_xprt(xprt, task); } EXPORT_SYMBOL_GPL(xprt_release_xprt); @@ -374,6 +377,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } + trace_xprt_release_cong(xprt, task); } EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); @@ -395,8 +399,7 @@ __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) { if (req->rq_cong) return 1; - dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", - req->rq_task->tk_pid, xprt->cong, xprt->cwnd); + trace_xprt_get_cong(xprt, req->rq_task); if (RPCXPRT_CONGESTED(xprt)) { xprt_set_congestion_window_wait(xprt); return 0; @@ -418,6 +421,7 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) req->rq_cong = 0; xprt->cong -= RPC_CWNDSCALE; xprt_test_and_clear_congestion_window_wait(xprt); + trace_xprt_put_cong(xprt, req->rq_task); __xprt_lock_write_next_cong(xprt); } -- cgit v1.2.3 From 4b93dab36f28e673725e5e6123ebfccf7697f96a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 13:07:21 -0400 Subject: xprtrdma: Add unique trace points for posting Local Invalidate WRs When adding frwr_unmap_async way back when, I re-used the existing trace_xprtrdma_post_send() trace point to record the return code of ib_post_send. Unfortunately there are some cases where re-using that trace point causes a crash. Instead, construct a trace point specific to posting Local Invalidate WRs that will always be safe to use in that context, and will act as a trace log eye-catcher for Local Invalidation. Fixes: 847568942f93 ("xprtrdma: Remove fr_state") Fixes: d8099feda483 ("xprtrdma: Reduce context switching due ... ") Signed-off-by: Chuck Lever Tested-by: Bill Baker Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 25 +++++++++++++++++++++++++ net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a13830616107..7fd11ec1c9a4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -735,6 +735,31 @@ TRACE_EVENT(xprtrdma_post_recvs, ) ); +TRACE_EVENT(xprtrdma_post_linv, + TP_PROTO( + const struct rpcrdma_req *req, + int status + ), + + TP_ARGS(req, status), + + TP_STRUCT__entry( + __field(const void *, req) + __field(int, status) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->req = req; + __entry->status = status; + __entry->xid = be32_to_cpu(req->rl_slot.rq_xid); + ), + + TP_printk("req=%p xid=0x%08x status=%d", + __entry->req, __entry->xid, __entry->status + ) +); + /** ** Completion events **/ diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 30065a28628c..9901a811f598 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -570,7 +570,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ bad_wr = NULL; rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); - trace_xprtrdma_post_send(req, rc); /* The final LOCAL_INV WR in the chain is supposed to * do the wake. If it was never posted, the wake will @@ -583,6 +582,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Recycle MRs in the LOCAL_INV chain that did not get posted. */ + trace_xprtrdma_post_linv(req, rc); while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); @@ -673,12 +673,12 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ bad_wr = NULL; rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); - trace_xprtrdma_post_send(req, rc); if (!rc) return; /* Recycle MRs in the LOCAL_INV chain that did not get posted. */ + trace_xprtrdma_post_linv(req, rc); while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); mr = container_of(frwr, struct rpcrdma_mr, frwr); -- cgit v1.2.3 From dc15c3d5f16808f7c171b55da6a82a5c0f279647 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:35 -0400 Subject: xprtrdma: Move the rpcrdma_sendctx::sc_wr field Clean up: This field is not needed in the Send completion handler, so it can be moved to struct rpcrdma_req to reduce the size of struct rpcrdma_sendctx, and to reduce the amount of memory that is sloshed between the sending process and the Send completion process. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 5 ++--- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 9 ++++++--- net/sunrpc/xprtrdma/verbs.c | 5 +---- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 5 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 7fd11ec1c9a4..f8edab91e09c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -667,9 +667,8 @@ TRACE_EVENT(xprtrdma_post_send, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; - __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; - __entry->signaled = req->rl_sendctx->sc_wr.send_flags & - IB_SEND_SIGNALED; + __entry->num_sge = req->rl_wr.num_sge; + __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; __entry->status = status; ), diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 5cd871568c67..523722be6a16 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -396,7 +396,7 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; - post_wr = &req->rl_sendctx->sc_wr; + post_wr = &req->rl_wr; list_for_each_entry(mr, &req->rl_registered, mr_list) { struct rpcrdma_frwr *frwr; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 1941b2261ca5..53cd2e3cf003 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -599,7 +599,7 @@ static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); - sc->sc_wr.num_sge++; + req->rl_wr.num_sge++; return true; out_regbuf: @@ -711,7 +711,7 @@ map_tail: } out: - sc->sc_wr.num_sge += sge_no; + req->rl_wr.num_sge += sge_no; if (sc->sc_unmap_count) kref_get(&req->rl_kref); return true; @@ -752,10 +752,13 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); if (!req->rl_sendctx) goto err; - req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_req = req; kref_init(&req->rl_kref); + req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe; + req->rl_wr.sg_list = req->rl_sendctx->sc_sges; + req->rl_wr.num_sge = 0; + req->rl_wr.opcode = IB_WR_SEND; ret = -EIO; if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3ab086aa69bb..2f4658255343 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -811,9 +811,6 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) if (!sc) return NULL; - sc->sc_wr.wr_cqe = &sc->sc_cqe; - sc->sc_wr.sg_list = sc->sc_sges; - sc->sc_wr.opcode = IB_WR_SEND; sc->sc_cqe.done = rpcrdma_wc_send; return sc; } @@ -1469,7 +1466,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_req *req) { - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; + struct ib_send_wr *send_wr = &req->rl_wr; int rc; if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0e5b7f373077..cdd6a3d43e0f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -219,7 +219,6 @@ enum { */ struct rpcrdma_req; struct rpcrdma_sendctx { - struct ib_send_wr sc_wr; struct ib_cqe sc_cqe; struct rpcrdma_req *sc_req; unsigned int sc_unmap_count; @@ -314,6 +313,7 @@ struct rpcrdma_req { struct rpcrdma_rep *rl_reply; struct xdr_stream rl_stream; struct xdr_buf rl_hdrbuf; + struct ib_send_wr rl_wr; struct rpcrdma_sendctx *rl_sendctx; struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ -- cgit v1.2.3 From 614f3c96d7e5efd1c4dc699524857130a52c6a7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:53 -0400 Subject: xprtrdma: Pull up sometimes On some platforms, DMA mapping part of a page is more costly than copying bytes. Restore the pull-up code and use that when we think it's going to be faster. The heuristic for now is to pull-up when the size of the RPC message body fits in the buffer underlying the head iovec. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated, as is handling a Send completion, for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 4 ++ net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 82 ++++++++++++++++++++++++++++++++++++--- net/sunrpc/xprtrdma/verbs.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 2 + 5 files changed, 85 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f8edab91e09c..213c72585a5f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -532,6 +532,8 @@ DEFINE_WRCH_EVENT(write); DEFINE_WRCH_EVENT(reply); TRACE_DEFINE_ENUM(rpcrdma_noch); +TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); +TRACE_DEFINE_ENUM(rpcrdma_noch_mapped); TRACE_DEFINE_ENUM(rpcrdma_readch); TRACE_DEFINE_ENUM(rpcrdma_areadch); TRACE_DEFINE_ENUM(rpcrdma_writech); @@ -540,6 +542,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych); #define xprtrdma_show_chunktype(x) \ __print_symbolic(x, \ { rpcrdma_noch, "inline" }, \ + { rpcrdma_noch_pullup, "pullup" }, \ + { rpcrdma_noch_mapped, "mapped" }, \ { rpcrdma_readch, "read list" }, \ { rpcrdma_areadch, "*read list" }, \ { rpcrdma_writech, "write list" }, \ diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 50e075fcdd8f..11685245546a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -79,7 +79,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) *p = xdr_zero; if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, - &rqst->rq_snd_buf, rpcrdma_noch)) + &rqst->rq_snd_buf, rpcrdma_noch_pullup)) return -EIO; trace_xprtrdma_cb_reply(rqst); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a441dbf9f198..4ad88893e964 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -392,7 +392,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, unsigned int pos; int nsegs; - if (rtype == rpcrdma_noch) + if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped) goto done; pos = rqst->rq_snd_buf.head[0].iov_len; @@ -691,6 +691,72 @@ out_mapping_err: return false; } +/* Copy the tail to the end of the head buffer. + */ +static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + unsigned char *dst; + + dst = (unsigned char *)xdr->head[0].iov_base; + dst += xdr->head[0].iov_len + xdr->page_len; + memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len); + r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len; +} + +/* Copy pagelist content into the head buffer. + */ +static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + unsigned int len, page_base, remaining; + struct page **ppages; + unsigned char *src, *dst; + + dst = (unsigned char *)xdr->head[0].iov_base; + dst += xdr->head[0].iov_len; + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_base = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining) { + src = page_address(*ppages); + src += page_base; + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); + memcpy(dst, src, len); + r_xprt->rx_stats.pullup_copy_count += len; + + ppages++; + dst += len; + remaining -= len; + page_base = 0; + } +} + +/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it. + * When the head, pagelist, and tail are small, a pull-up copy + * is considerably less costly than DMA mapping the components + * of @xdr. + * + * Assumptions: + * - the caller has already verified that the total length + * of the RPC Call body will fit into @rl_sendbuf. + */ +static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + if (unlikely(xdr->tail[0].iov_len)) + rpcrdma_pullup_tail_iov(r_xprt, req, xdr); + + if (unlikely(xdr->page_len)) + rpcrdma_pullup_pagelist(r_xprt, req, xdr); + + /* The whole RPC message resides in the head iovec now */ + return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len); +} + static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct xdr_buf *xdr) @@ -779,7 +845,11 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, goto out_unmap; switch (rtype) { - case rpcrdma_noch: + case rpcrdma_noch_pullup: + if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) + goto out_unmap; + break; + case rpcrdma_noch_mapped: if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr)) goto out_unmap; break; @@ -827,6 +897,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct xdr_stream *xdr = &req->rl_stream; enum rpcrdma_chunktype rtype, wtype; + struct xdr_buf *buf = &rqst->rq_snd_buf; bool ddp_allowed; __be32 *p; int ret; @@ -884,8 +955,9 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) */ if (rpcrdma_args_inline(r_xprt, rqst)) { *p++ = rdma_msg; - rtype = rpcrdma_noch; - } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { + rtype = buf->len < rdmab_length(req->rl_sendbuf) ? + rpcrdma_noch_pullup : rpcrdma_noch_mapped; + } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) { *p++ = rdma_msg; rtype = rpcrdma_readch; } else { @@ -927,7 +999,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) goto out_err; ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len, - &rqst->rq_snd_buf, rtype); + buf, rtype); if (ret) goto out_err; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2f4658255343..a514e2c89ac3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1165,7 +1165,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE, + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, GFP_KERNEL); if (!req) goto out; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cdd6a3d43e0f..5d15140a0266 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -554,6 +554,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); enum rpcrdma_chunktype { rpcrdma_noch = 0, + rpcrdma_noch_pullup, + rpcrdma_noch_mapped, rpcrdma_readch, rpcrdma_areadch, rpcrdma_writech, -- cgit v1.2.3 From 7b020f17bbd34c219419b634d9efb9e93a3af4c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:01:58 -0400 Subject: xprtrdma: Report the computed connect delay For debugging, the op_connect trace point should report the computed connect delay. We can then ensure that the delay is computed at the proper times, for example. As a further clean-up, remove a few low-value "heartbeat" trace points in the connect path. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 77 ++++++++++++++++++++++++++++++----------- net/sunrpc/xprtrdma/transport.c | 3 +- net/sunrpc/xprtrdma/verbs.c | 13 ++----- 3 files changed, 60 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 213c72585a5f..99e3c61609b2 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -85,6 +85,44 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, ), \ TP_ARGS(r_xprt)) +DECLARE_EVENT_CLASS(xprtrdma_connect_class, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + int rc + ), + + TP_ARGS(r_xprt, rc), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(int, rc) + __field(int, connect_status) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->rc = rc; + __entry->connect_status = r_xprt->rx_ep.rep_connected; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->rc, __entry->connect_status + ) +); + +#define DEFINE_CONN_EVENT(name) \ + DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \ + TP_PROTO( \ + const struct rpcrdma_xprt *r_xprt, \ + int rc \ + ), \ + TP_ARGS(r_xprt, rc)) + DECLARE_EVENT_CLASS(xprtrdma_rdch_event, TP_PROTO( const struct rpc_task *task, @@ -333,47 +371,44 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); -TRACE_EVENT(xprtrdma_disconnect, +DEFINE_CONN_EVENT(connect); +DEFINE_CONN_EVENT(disconnect); + +DEFINE_RXPRT_EVENT(xprtrdma_create); +DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); +DEFINE_RXPRT_EVENT(xprtrdma_remove); +DEFINE_RXPRT_EVENT(xprtrdma_reinsert); +DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); +DEFINE_RXPRT_EVENT(xprtrdma_op_close); + +TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( const struct rpcrdma_xprt *r_xprt, - int status + unsigned long delay ), - TP_ARGS(r_xprt, status), + TP_ARGS(r_xprt, delay), TP_STRUCT__entry( __field(const void *, r_xprt) - __field(int, status) - __field(int, connected) + __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( __entry->r_xprt = r_xprt; - __entry->status = status; - __entry->connected = r_xprt->rx_ep.rep_connected; + __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->status, - __entry->connected == 1 ? "still " : "dis" + TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->delay ) ); -DEFINE_RXPRT_EVENT(xprtrdma_conn_start); -DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); -DEFINE_RXPRT_EVENT(xprtrdma_create); -DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); -DEFINE_RXPRT_EVENT(xprtrdma_remove); -DEFINE_RXPRT_EVENT(xprtrdma_reinsert); -DEFINE_RXPRT_EVENT(xprtrdma_reconnect); -DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_close); -DEFINE_RXPRT_EVENT(xprtrdma_op_connect); TRACE_EVENT(xprtrdma_op_set_cto, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 361e59146807..ce263e6f779c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -527,13 +527,12 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); unsigned long delay; - trace_xprtrdma_op_connect(r_xprt); - delay = 0; if (r_xprt->rx_ep.rep_connected != 0) { delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } + trace_xprtrdma_op_connect(r_xprt, delay); queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, delay); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a514e2c89ac3..92bdf053716a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -297,8 +297,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) struct rdma_cm_id *id; int rc; - trace_xprtrdma_conn_start(xprt); - init_completion(&ia->ri_done); init_completion(&ia->ri_remove_done); @@ -314,10 +312,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) if (rc) goto out; rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); - if (rc < 0) { - trace_xprtrdma_conn_tout(xprt); + if (rc < 0) goto out; - } rc = ia->ri_async_rc; if (rc) @@ -328,10 +324,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) if (rc) goto out; rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); - if (rc < 0) { - trace_xprtrdma_conn_tout(xprt); + if (rc < 0) goto out; - } rc = ia->ri_async_rc; if (rc) goto out; @@ -644,8 +638,6 @@ static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id, *old; int err, rc; - trace_xprtrdma_reconnect(r_xprt); - rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); rc = -EHOSTUNREACH; @@ -744,6 +736,7 @@ out: ep->rep_connected = rc; out_noupdate: + trace_xprtrdma_connect(r_xprt, rc); return rc; } -- cgit v1.2.3 From d4957f01d29b2a01200117fc04b9faaa52aca4bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:03 -0400 Subject: xprtrdma: Refine trace_xprtrdma_fixup Slightly reduce overhead and display more useful information. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 60 +++++++++--------------------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 5 ++-- 2 files changed, 15 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 99e3c61609b2..542177c5896f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1084,66 +1084,32 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); TRACE_EVENT(xprtrdma_fixup, TP_PROTO( const struct rpc_rqst *rqst, - int len, - int hdrlen + unsigned long fixup ), - TP_ARGS(rqst, len, hdrlen), + TP_ARGS(rqst, fixup), TP_STRUCT__entry( __field(unsigned int, task_id) __field(unsigned int, client_id) - __field(const void *, base) - __field(int, len) - __field(int, hdrlen) - ), - - TP_fast_assign( - __entry->task_id = rqst->rq_task->tk_pid; - __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->base = rqst->rq_rcv_buf.head[0].iov_base; - __entry->len = len; - __entry->hdrlen = hdrlen; - ), - - TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", - __entry->task_id, __entry->client_id, - __entry->base, __entry->len, __entry->hdrlen - ) -); - -TRACE_EVENT(xprtrdma_fixup_pg, - TP_PROTO( - const struct rpc_rqst *rqst, - int pageno, - const void *pos, - int len, - int curlen - ), - - TP_ARGS(rqst, pageno, pos, len, curlen), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, pos) - __field(int, pageno) - __field(int, len) - __field(int, curlen) + __field(unsigned long, fixup) + __field(size_t, headlen) + __field(unsigned int, pagelen) + __field(size_t, taillen) ), TP_fast_assign( __entry->task_id = rqst->rq_task->tk_pid; __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->pos = pos; - __entry->pageno = pageno; - __entry->len = len; - __entry->curlen = curlen; + __entry->fixup = fixup; + __entry->headlen = rqst->rq_rcv_buf.head[0].iov_len; + __entry->pagelen = rqst->rq_rcv_buf.page_len; + __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", - __entry->task_id, __entry->client_id, - __entry->pageno, __entry->pos, __entry->len, __entry->curlen + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", + __entry->task_id, __entry->client_id, __entry->fixup, + __entry->headlen, __entry->pagelen, __entry->taillen ) ); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 4ad88893e964..26d334c83b38 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1086,7 +1086,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) curlen = copy_len; - trace_xprtrdma_fixup(rqst, copy_len, curlen); srcp += curlen; copy_len -= curlen; @@ -1106,8 +1105,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > pagelist_len) curlen = pagelist_len; - trace_xprtrdma_fixup_pg(rqst, i, srcp, - copy_len, curlen); destp = kmap_atomic(ppages[i]); memcpy(destp + page_base, srcp, curlen); flush_dcache_page(ppages[i]); @@ -1139,6 +1136,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) rqst->rq_private_buf.tail[0].iov_base = srcp; } + if (fixup_copy_count) + trace_xprtrdma_fixup(rqst, fixup_copy_count); return fixup_copy_count; } -- cgit v1.2.3 From f54c870d326aa02b73b68d2e0a503ec81dd3a4e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:09 -0400 Subject: xprtrdma: Replace dprintk() in rpcrdma_update_connect_private() Clean up: Use a single trace point to record each connection's negotiated inline thresholds and the computed maximum byte size of transport headers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---- net/sunrpc/xprtrdma/verbs.c | 21 ++++++++++----------- 3 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 542177c5896f..0ca118bcc5ce 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -371,6 +371,42 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); +TRACE_EVENT(xprtrdma_inline_thresh, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt + ), + + TP_ARGS(r_xprt), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, inline_send) + __field(unsigned int, inline_recv) + __field(unsigned int, max_send) + __field(unsigned int, max_recv) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + const struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + __entry->r_xprt = r_xprt; + __entry->inline_send = ep->rep_inline_send; + __entry->inline_recv = ep->rep_inline_recv; + __entry->max_send = ep->rep_max_inline_send; + __entry->max_recv = ep->rep_max_inline_recv; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->inline_send, __entry->inline_recv, + __entry->max_send, __entry->max_recv + ) +); + DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 26d334c83b38..aec3beb93b25 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -78,8 +78,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) size += rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ - dprintk("RPC: %s: max call header size = %u\n", - __func__, size); return size; } @@ -100,8 +98,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ - dprintk("RPC: %s: max reply header size = %u\n", - __func__, size); return size; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 92bdf053716a..77c7dd7f05e8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -177,11 +177,11 @@ out_flushed: rpcrdma_recv_buffer_put(rep); } -static void -rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, - struct rdma_conn_param *param) +static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, + struct rdma_conn_param *param) { const struct rpcrdma_connect_private *pmsg = param->private_data; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ @@ -197,13 +197,11 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } - if (rsize < r_xprt->rx_ep.rep_inline_recv) - r_xprt->rx_ep.rep_inline_recv = rsize; - if (wsize < r_xprt->rx_ep.rep_inline_send) - r_xprt->rx_ep.rep_inline_send = wsize; - dprintk("RPC: %s: max send %u, max recv %u\n", __func__, - r_xprt->rx_ep.rep_inline_send, - r_xprt->rx_ep.rep_inline_recv); + if (rsize < ep->rep_inline_recv) + ep->rep_inline_recv = rsize; + if (wsize < ep->rep_inline_send) + ep->rep_inline_send = wsize; + rpcrdma_set_max_header_sizes(r_xprt); } @@ -257,7 +255,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_ESTABLISHED: ++xprt->connect_cookie; ep->rep_connected = 1; - rpcrdma_update_connect_private(r_xprt, &event->param.conn); + rpcrdma_update_cm_private(r_xprt, &event->param.conn); + trace_xprtrdma_inline_thresh(r_xprt); wake_up_all(&ep->rep_connect_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: -- cgit v1.2.3 From a52c23b8b207d676d6cdf531af482a79fa622b9d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:14 -0400 Subject: xprtrdma: Replace dprintk in xprt_rdma_set_port Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 + net/sunrpc/xprtrdma/transport.c | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 0ca118bcc5ce..69a8278e5cef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -416,6 +416,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_remove); DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); +DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ce263e6f779c..7395eb2cfdeb 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -441,12 +441,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) struct sockaddr *sap = (struct sockaddr *)&xprt->addr; char buf[8]; - dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", - __func__, xprt, - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - port); - rpc_set_port(sap, port); kfree(xprt->address_strings[RPC_DISPLAY_PORT]); @@ -456,6 +450,9 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); + + trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, + rx_xprt)); } /** -- cgit v1.2.3 From e86d5a02874c1364c50e1b532481835b54173ed2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 16:38:56 -0400 Subject: NFS: Convert struct nfs_fattr to use struct timespec64 NFSv4 supports 64-bit times, so we should switch to using struct timespec64 when decoding attributes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 54 +++++++++++++++++++++++------------------------ fs/nfs/internal.h | 2 +- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 2 +- fs/nfs/nfs4xdr.c | 14 ++++++------ include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 12 +++++------ 7 files changed, 44 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2a03bfeec10a..b0b4b9f303fd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -504,15 +504,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (nfs_server_capable(inode, NFS_CAP_ATIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -698,7 +698,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -709,14 +709,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (attr->ia_valid & ATTR_ATIME_SET) inode->i_atime = attr->ia_atime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -725,14 +725,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; else if (attr->ia_valid & ATTR_MTIME_SET) inode->i_mtime = attr->ia_mtime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -1351,7 +1351,7 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct timespec ts; + struct timespec64 ts; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -1361,18 +1361,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } /* If we have atomic WCC data, we may update some attributes */ - ts = timespec64_to_timespec(inode->i_ctime); + ts = inode->i_ctime; if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&ts, &fattr->pre_ctime)) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + && timespec64_equal(&ts, &fattr->pre_ctime)) { + inode->i_ctime = fattr->ctime; } - ts = timespec64_to_timespec(inode->i_mtime); + ts = inode->i_mtime; if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) - && timespec_equal(&ts, &fattr->pre_mtime)) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + && timespec64_equal(&ts, &fattr->pre_mtime)) { + inode->i_mtime = fattr->mtime; if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } @@ -1398,7 +1398,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; unsigned long invalid = 0; - struct timespec ts; + struct timespec64 ts; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; @@ -1425,12 +1425,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_CHANGE | NFS_INO_REVAL_PAGECACHE; - ts = timespec64_to_timespec(inode->i_mtime); - if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime)) + ts = inode->i_mtime; + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - ts = timespec64_to_timespec(inode->i_ctime); - if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime)) + ts = inode->i_ctime; + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1460,8 +1460,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_OTHER; - ts = timespec64_to_timespec(inode->i_atime); - if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime)) + ts = inode->i_atime; + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) @@ -1733,12 +1733,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime); + fattr->pre_ctime = inode->i_ctime; fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime); + fattr->pre_mtime = inode->i_mtime; fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -1899,7 +1899,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode->i_mtime = fattr->mtime; } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_MTIME @@ -1908,7 +1908,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode->i_ctime = fattr->ctime; } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_CTIME @@ -1946,7 +1946,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode->i_atime = fattr->atime; else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 447a3c17fa8e..24a65da58aa9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -713,7 +713,7 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) * 1024*1024*1024. */ static inline -u64 nfs_timespec_to_change_attr(const struct timespec *ts) +u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cbc17a203248..d4e144712034 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -234,7 +234,7 @@ static __be32 *xdr_encode_current_server_time(__be32 *p, return p; } -static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep) +static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep) { timep->tv_sec = be32_to_cpup(p++); timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 602767850b36..2a16bbda3937 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -463,7 +463,7 @@ static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep) return p; } -static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) +static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep) { timep->tv_sec = be32_to_cpup(p++); timep->tv_nsec = be32_to_cpup(p++); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ab07db0f07cd..2af962810ed8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4065,17 +4065,17 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint } static __be32 * -xdr_decode_nfstime4(__be32 *p, struct timespec *t) +xdr_decode_nfstime4(__be32 *p, struct timespec64 *t) { __u64 sec; p = xdr_decode_hyper(p, &sec); - t-> tv_sec = (time_t)sec; + t-> tv_sec = sec; t->tv_nsec = be32_to_cpup(p++); return p; } -static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) +static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time) { __be32 *p; @@ -4086,7 +4086,7 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) return 0; } -static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4104,7 +4104,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } -static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4123,7 +4123,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s } static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, - struct timespec *time) + struct timespec64 *time) { int status = 0; @@ -4186,7 +4186,7 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, return status; } -static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) +static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..47266870a235 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -171,7 +171,7 @@ struct nfs_server { struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ - struct timespec time_delta; /* smallest time granularity */ + struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..db5c01001937 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -62,14 +62,14 @@ struct nfs_fattr { struct nfs_fsid fsid; __u64 fileid; __u64 mounted_on_fileid; - struct timespec atime; - struct timespec mtime; - struct timespec ctime; + struct timespec64 atime; + struct timespec64 mtime; + struct timespec64 ctime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ + struct timespec64 pre_mtime; /* pre_op_attr.mtime */ + struct timespec64 pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; struct nfs4_string *owner_name; @@ -143,7 +143,7 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - struct timespec time_delta; /* server time granularity */ + struct timespec64 time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ __u32 nlayouttypes; /* number of layouttypes */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ -- cgit v1.2.3 From 6430b323ae09f146dfc26e6d17c432bfc3d11452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 17:00:02 -0400 Subject: NFSv3: Clean up timespec encode Simplify the struct iattr timestamp encoding by skipping the step of an intermediate struct timespec. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 12 ++++-------- include/linux/nfs_xdr.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 2a16bbda3937..927eb680f161 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -456,9 +456,9 @@ static void zero_nfs_fh3(struct nfs_fh *fh) * uint32 nseconds; * }; */ -static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep) +static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep) { - *p++ = cpu_to_be32(timep->tv_sec); + *p++ = cpu_to_be32((u32)timep->tv_sec); *p++ = cpu_to_be32(timep->tv_nsec); return p; } @@ -533,7 +533,6 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep) static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, struct user_namespace *userns) { - struct timespec ts; u32 nbytes; __be32 *p; @@ -583,10 +582,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, *p++ = xdr_zero; if (attr->ia_valid & ATTR_ATIME_SET) { - struct timespec ts; *p++ = xdr_two; - ts = timespec64_to_timespec(attr->ia_atime); - p = xdr_encode_nfstime3(p, &ts); + p = xdr_encode_nfstime3(p, &attr->ia_atime); } else if (attr->ia_valid & ATTR_ATIME) { *p++ = xdr_one; } else @@ -594,8 +591,7 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, if (attr->ia_valid & ATTR_MTIME_SET) { *p++ = xdr_two; - ts = timespec64_to_timespec(attr->ia_mtime); - xdr_encode_nfstime3(p, &ts); + xdr_encode_nfstime3(p, &attr->ia_mtime); } else if (attr->ia_valid & ATTR_MTIME) { *p = xdr_one; } else diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index db5c01001937..22bc6613474e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -869,7 +869,7 @@ struct nfs3_sattrargs { struct nfs_fh * fh; struct iattr * sattr; unsigned int guard; - struct timespec guardtime; + struct timespec64 guardtime; }; struct nfs3_diropargs { -- cgit v1.2.3 From 4b1b69cedf9de8c203101ea74510c07d428538f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Oct 2019 14:08:43 -0400 Subject: NFS: Add a flag to tell nfs_client to set RPC_CLNT_CREATE_NOPING Add a flag to tell the nfs_client it should set RPC_CLNT_CREATE_NOPING when creating the rpc client. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 30838304a0bf..fa7d92328c72 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -515,6 +515,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; + if (test_bit(NFS_CS_NOPING, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NOPING; if (!IS_ERR(clp->cl_rpcclient)) return 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 47266870a235..a50dd432475b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -45,6 +45,7 @@ struct nfs_client { #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ +#define NFS_CS_NOPING 6 /* - don't ping on connect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From 52f98f1a2ddd2bb561f2c7e3b19a81d816a63118 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:49:45 -0400 Subject: NFS/pnfs: Separate NFSv3 DS and MDS traffic If a NFSv3 server is being used as both a DS and as a regular NFSv3 server, we may want to keep the IO traffic on a separate TCP connection, since it will typically have very different timeout characteristics. This patch therefore sets up a flag to separate the two modes of operation for the nfs_client. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 ++++++ fs/nfs/nfs3client.c | 1 + include/linux/nfs_fs_sb.h | 1 + 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fa7d92328c72..bd6575ee3b8e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -312,6 +312,12 @@ again: /* Match nfsv4 minorversion */ if (clp->cl_minorversion != data->minorversion) continue; + + /* Match request for a dedicated DS */ + if (test_bit(NFS_CS_DS, &data->init_flags) != + test_bit(NFS_CS_DS, &clp->cl_flags)) + continue; + /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) /* Match all xprt_switch full socket addresses */ diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 793fa4273edb..223904bc40a7 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -109,6 +109,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); __set_bit(NFS_CS_NOPING, &cl_init.init_flags); + __set_bit(NFS_CS_DS, &cl_init.init_flags); /* Use the MDS nfs_client cl_ipaddr. */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50dd432475b..69e80cef5a81 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -46,6 +46,7 @@ struct nfs_client { #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ +#define NFS_CS_DS 7 /* - Server is a DS */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From e6237b6feb37582fbd6bd7a8336d1256a6b4b4f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 11:13:54 -0400 Subject: NFSv4.1: Don't rebind to the same source port when reconnecting to the server NFSv2, v3 and NFSv4 servers often have duplicate replay caches that look at the source port when deciding whether or not an RPC call is a replay of a previous call. This requires clients to perform strange TCP gymnastics in order to ensure that when they reconnect to the server, they bind to the same source port. NFSv4.1 and NFSv4.2 have sessions that provide proper replay semantics, that do not look at the source port of the connection. This patch therefore ensures they can ignore the rebind requirement. Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 3 ++- fs/nfs/client.c | 3 +++ fs/nfs/nfs4client.c | 5 ++++- include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/clnt.c | 7 ++++++- net/sunrpc/xprtsock.c | 2 +- 8 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7d46fafdbbe5..0afb6d59bad0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -464,7 +464,8 @@ nlm_bind_host(struct nlm_host *host) .version = host->h_version, .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_NOPING | - RPC_CLNT_CREATE_AUTOBIND), + RPC_CLNT_CREATE_AUTOBIND | + RPC_CLNT_CREATE_REUSEPORT), .cred = host->h_cred, }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index bd6575ee3b8e..02110a30a49e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -523,6 +523,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (test_bit(NFS_CS_NOPING, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NOPING; + if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_REUSEPORT; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -670,6 +672,7 @@ static int nfs_init_server(struct nfs_server *server, .timeparms = &timeparms, .cred = server->cred, .nconnect = data->nfs_server.nconnect, + .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ebc960dd89ff..abd5af77fe94 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -879,8 +879,11 @@ static int nfs4_set_client(struct nfs_server *server, }; struct nfs_client *clp; - if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) + if (minorversion == 0) + __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); + else if (proto == XPRT_TRANSPORT_TCP) cl_init.nconnect = nconnect; + if (server->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); if (server->options & NFS_OPTION_MIGRATION) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 69e80cef5a81..df61ff8981e8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -47,6 +47,7 @@ struct nfs_client { #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ +#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index abc63bd1be2b..ec52e78d432b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -149,6 +149,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) +#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d783e15ba898..ccd35cf4fc41 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -207,7 +207,8 @@ struct rpc_xprt { unsigned int min_reqs; /* min number of slots */ unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ - unsigned char resvport : 1; /* use a reserved port */ + unsigned char resvport : 1, /* use a reserved port */ + reuseport : 1; /* reuse port on reconnect */ atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f7f78566be46..5baf9b9be2e8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -591,6 +591,9 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) xprt->resvport = 1; if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; + xprt->reuseport = 0; + if (args->flags & RPC_CLNT_CREATE_REUSEPORT) + xprt->reuseport = 1; clnt = rpc_create_xprt(args, xprt); if (IS_ERR(clnt) || args->nconnect <= 1) @@ -2906,7 +2909,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt; unsigned long connect_timeout; unsigned long reconnect_timeout; - unsigned char resvport; + unsigned char resvport, reuseport; int ret = 0; rcu_read_lock(); @@ -2918,6 +2921,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, return -EAGAIN; } resvport = xprt->resvport; + reuseport = xprt->reuseport; connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; rcu_read_unlock(); @@ -2928,6 +2932,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, goto out_put_switch; } xprt->resvport = resvport; + xprt->reuseport = reuseport; if (xprt->ops->set_connect_timeout != NULL) xprt->ops->set_connect_timeout(xprt, connect_timeout, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 70e52f567b2a..98e2d40b2d6a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1752,7 +1752,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) { - if (transport->srcport == 0) + if (transport->srcport == 0 && transport->xprt.reuseport) transport->srcport = xs_sock_getport(sock); } -- cgit v1.2.3 From 634d811c619b0dbe16dc890a53d2c978e9d055d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Nov 2019 14:59:02 -0500 Subject: nfsv4: Move NFSPROC4_CLNT_COPY_NOTIFY to end of list We shouldn't insert things into the NFSPROC4_CLNT enums, since that causes the nfsstat array to be reordered. Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5e7a5261af4e..82d8fb422092 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,10 +537,11 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, + + NFSPROC4_CLNT_COPY_NOTIFY, }; /* nfs41 types */ -- cgit v1.2.3 From a264abad51d8ecb7954a2f6d9f1885b38daffc74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Nov 2019 16:25:52 -0500 Subject: SUNRPC: Capture completion of all RPC tasks RPC tasks on the backchannel never invoke xprt_complete_rqst(), so there is no way to report their tk_status at completion. Also, any RPC task that exits via rpc_exit_task() before it is replied to will also disappear without a trace. Introduce a trace point that is symmetrical with rpc_task_begin that captures the termination status of each RPC task. Sample trace output for callback requests initiated on the server: kworker/u8:12-448 [003] 127.025240: rpc_task_end: task:50@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [002] 127.567310: rpc_task_end: task:51@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [001] 130.506817: rpc_task_end: task:52@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task Odd, though, that I never see trace_rpc_task_complete, either in the forward or backchannel. Should it be removed? Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 378233fe5ac7..205bf28bcada 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -165,6 +165,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(end); DECLARE_EVENT_CLASS(rpc_task_queued, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 987c4b1f0b17..9c79548c6847 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -824,6 +824,7 @@ rpc_reset_task_statistics(struct rpc_task *task) */ void rpc_exit_task(struct rpc_task *task) { + trace_rpc_task_end(task, task->tk_action); task->tk_action = NULL; if (task->tk_ops->rpc_count_stats) task->tk_ops->rpc_count_stats(task, task->tk_calldata); -- cgit v1.2.3