diff options
Diffstat (limited to 'net')
51 files changed, 1956 insertions, 1060 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 776618cd2be..b23a17c431c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -740,10 +740,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } +again: /* Wait for the response */ err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + goto again; + } + if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; @@ -1420,6 +1428,7 @@ int p9_client_clunk(struct p9_fid *fid) int err; struct p9_client *clnt; struct p9_req_t *req; + int retries = 0; if (!fid) { pr_warn("%s (%d): Trying to clunk with NULL fid\n", @@ -1428,7 +1437,9 @@ int p9_client_clunk(struct p9_fid *fid) return 0; } - p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); +again: + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; clnt = fid->clnt; @@ -1444,8 +1455,14 @@ int p9_client_clunk(struct p9_fid *fid) error: /* * Fid is not valid even after a failed clunk + * If interrupted, retry once then give up and + * leak fid until umount. */ - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) { + if (retries++ == 0) + goto again; + } else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1470,7 +1487,10 @@ int p9_client_remove(struct p9_fid *fid) p9_free_req(clnt, req); error: - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) + p9_client_clunk(fid); + else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 761ad9d6cc3..cc913193d99 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -201,7 +201,9 @@ enum { Opt_ip, Opt_last_string, /* string args above */ + Opt_share, Opt_noshare, + Opt_crc, Opt_nocrc, }; @@ -217,7 +219,9 @@ static match_table_t opt_tokens = { {Opt_key, "key=%s"}, {Opt_ip, "ip=%s"}, /* string args above */ + {Opt_share, "share"}, {Opt_noshare, "noshare"}, + {Opt_crc, "crc"}, {Opt_nocrc, "nocrc"}, {-1, NULL} }; @@ -277,10 +281,11 @@ out: return err; } -int ceph_parse_options(struct ceph_options **popt, char *options, - const char *dev_name, const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private) +struct ceph_options * +ceph_parse_options(char *options, const char *dev_name, + const char *dev_name_end, + int (*parse_extra_token)(char *c, void *private), + void *private) { struct ceph_options *opt; const char *c; @@ -289,7 +294,7 @@ int ceph_parse_options(struct ceph_options **popt, char *options, opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) - return err; + return ERR_PTR(-ENOMEM); opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), GFP_KERNEL); if (!opt->mon_addr) @@ -398,10 +403,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options, opt->mount_timeout = intval; break; + case Opt_share: + opt->flags &= ~CEPH_OPT_NOSHARE; + break; case Opt_noshare: opt->flags |= CEPH_OPT_NOSHARE; break; + case Opt_crc: + opt->flags &= ~CEPH_OPT_NOCRC; + break; case Opt_nocrc: opt->flags |= CEPH_OPT_NOCRC; break; @@ -412,12 +423,11 @@ int ceph_parse_options(struct ceph_options **popt, char *options, } /* success */ - *popt = opt; - return 0; + return opt; out: ceph_destroy_options(opt); - return err; + return ERR_PTR(err); } EXPORT_SYMBOL(ceph_parse_options); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad5b70801f3..f0993af2ae4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -38,48 +38,54 @@ static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; static struct lock_class_key socket_class; #endif +/* + * When skipping (ignoring) a block of input we read it into a "skip + * buffer," which is this many bytes in size. + */ +#define SKIP_BUF_SIZE 1024 static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); static void ceph_fault(struct ceph_connection *con); /* - * nicely render a sockaddr as a string. + * Nicely render a sockaddr as a string. An array of formatted + * strings is used, to approximate reentrancy. */ -#define MAX_ADDR_STR 20 -#define MAX_ADDR_STR_LEN 60 -static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; -static DEFINE_SPINLOCK(addr_str_lock); -static int last_addr_str; +#define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */ +#define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG) +#define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1) +#define MAX_ADDR_STR_LEN 64 /* 54 is enough */ + +static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN]; +static atomic_t addr_str_seq = ATOMIC_INIT(0); + +static struct page *zero_page; /* used in certain error cases */ const char *ceph_pr_addr(const struct sockaddr_storage *ss) { int i; char *s; - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; - - spin_lock(&addr_str_lock); - i = last_addr_str++; - if (last_addr_str == MAX_ADDR_STR) - last_addr_str = 0; - spin_unlock(&addr_str_lock); + struct sockaddr_in *in4 = (struct sockaddr_in *) ss; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; + + i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; s = addr_str[i]; switch (ss->ss_family) { case AF_INET: - snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, - (unsigned int)ntohs(in4->sin_port)); + snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, + ntohs(in4->sin_port)); break; case AF_INET6: - snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, - (unsigned int)ntohs(in6->sin6_port)); + snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, + ntohs(in6->sin6_port)); break; default: - snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", - (int)ss->ss_family); + snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", + ss->ss_family); } return s; @@ -95,22 +101,43 @@ static void encode_my_addr(struct ceph_messenger *msgr) /* * work queue for all reading and writing to/from the socket. */ -struct workqueue_struct *ceph_msgr_wq; +static struct workqueue_struct *ceph_msgr_wq; + +void _ceph_msgr_exit(void) +{ + if (ceph_msgr_wq) { + destroy_workqueue(ceph_msgr_wq); + ceph_msgr_wq = NULL; + } + + BUG_ON(zero_page == NULL); + kunmap(zero_page); + page_cache_release(zero_page); + zero_page = NULL; +} int ceph_msgr_init(void) { + BUG_ON(zero_page != NULL); + zero_page = ZERO_PAGE(0); + page_cache_get(zero_page); + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); - if (!ceph_msgr_wq) { - pr_err("msgr_init failed to create workqueue\n"); - return -ENOMEM; - } - return 0; + if (ceph_msgr_wq) + return 0; + + pr_err("msgr_init failed to create workqueue\n"); + _ceph_msgr_exit(); + + return -ENOMEM; } EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { - destroy_workqueue(ceph_msgr_wq); + BUG_ON(ceph_msgr_wq == NULL); + + _ceph_msgr_exit(); } EXPORT_SYMBOL(ceph_msgr_exit); @@ -128,8 +155,8 @@ EXPORT_SYMBOL(ceph_msgr_flush); /* data available on socket, or listen socket received a connect */ static void ceph_data_ready(struct sock *sk, int count_unused) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; + if (sk->sk_state != TCP_CLOSE_WAIT) { dout("ceph_data_ready on %p state = %lu, queueing work\n", con, con->state); @@ -140,26 +167,30 @@ static void ceph_data_ready(struct sock *sk, int count_unused) /* socket has buffer space for writing */ static void ceph_write_space(struct sock *sk) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; - /* only queue to workqueue if there is data we want to write. */ + /* only queue to workqueue if there is data we want to write, + * and there is sufficient space in the socket buffer to accept + * more data. clear SOCK_NOSPACE so that ceph_write_space() + * doesn't get called again until try_write() fills the socket + * buffer. See net/ipv4/tcp_input.c:tcp_check_space() + * and net/core/stream.c:sk_stream_write_space(). + */ if (test_bit(WRITE_PENDING, &con->state)) { - dout("ceph_write_space %p queueing write work\n", con); - queue_con(con); + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + dout("ceph_write_space %p queueing write work\n", con); + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + queue_con(con); + } } else { dout("ceph_write_space %p nothing to write\n", con); } - - /* since we have our own write_space, clear the SOCK_NOSPACE flag */ - clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); } /* socket's state has changed */ static void ceph_state_change(struct sock *sk) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; dout("ceph_state_change %p state = %lu sk_state = %u\n", con, con->state, sk->sk_state); @@ -184,6 +215,8 @@ static void ceph_state_change(struct sock *sk) dout("ceph_state_change TCP_ESTABLISHED\n"); queue_con(con); break; + default: /* Everything else is uninteresting */ + break; } } @@ -194,7 +227,7 @@ static void set_sock_callbacks(struct socket *sock, struct ceph_connection *con) { struct sock *sk = sock->sk; - sk->sk_user_data = (void *)con; + sk->sk_user_data = con; sk->sk_data_ready = ceph_data_ready; sk->sk_write_space = ceph_write_space; sk->sk_state_change = ceph_state_change; @@ -208,7 +241,7 @@ static void set_sock_callbacks(struct socket *sock, /* * initiate connection to a remote socket. */ -static struct socket *ceph_tcp_connect(struct ceph_connection *con) +static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; @@ -218,8 +251,7 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) - return ERR_PTR(ret); - con->sock = sock; + return ret; sock->sk->sk_allocation = GFP_NOFS; #ifdef CONFIG_LOCKDEP @@ -236,19 +268,17 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr.in_addr), sock->sk->sk_state); - ret = 0; - } - if (ret < 0) { + } else if (ret < 0) { pr_err("connect %s error %d\n", ceph_pr_addr(&con->peer_addr.in_addr), ret); sock_release(sock); - con->sock = NULL; con->error_msg = "connect error"; + + return ret; } + con->sock = sock; - if (ret < 0) - return ERR_PTR(ret); - return sock; + return 0; } static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) @@ -284,6 +314,19 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, return r; } +static int ceph_tcp_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int more) +{ + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); + int ret; + + ret = kernel_sendpage(sock, page, offset, size, flags); + if (ret == -EAGAIN) + ret = 0; + + return ret; +} + /* * Shutdown/close the socket for the given connection. @@ -391,22 +434,23 @@ bool ceph_con_opened(struct ceph_connection *con) */ struct ceph_connection *ceph_con_get(struct ceph_connection *con) { - dout("con_get %p nref = %d -> %d\n", con, - atomic_read(&con->nref), atomic_read(&con->nref) + 1); - if (atomic_inc_not_zero(&con->nref)) - return con; - return NULL; + int nref = __atomic_add_unless(&con->nref, 1, 0); + + dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); + + return nref ? con : NULL; } void ceph_con_put(struct ceph_connection *con) { - dout("con_put %p nref = %d -> %d\n", con, - atomic_read(&con->nref), atomic_read(&con->nref) - 1); - BUG_ON(atomic_read(&con->nref) == 0); - if (atomic_dec_and_test(&con->nref)) { + int nref = atomic_dec_return(&con->nref); + + BUG_ON(nref < 0); + if (nref == 0) { BUG_ON(con->sock); kfree(con); } + dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); } /* @@ -442,14 +486,35 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) return ret; } +static void ceph_con_out_kvec_reset(struct ceph_connection *con) +{ + con->out_kvec_left = 0; + con->out_kvec_bytes = 0; + con->out_kvec_cur = &con->out_kvec[0]; +} + +static void ceph_con_out_kvec_add(struct ceph_connection *con, + size_t size, void *data) +{ + int index; + + index = con->out_kvec_left; + BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); + + con->out_kvec[index].iov_len = size; + con->out_kvec[index].iov_base = data; + con->out_kvec_left++; + con->out_kvec_bytes += size; +} /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ -static void prepare_write_message_footer(struct ceph_connection *con, int v) +static void prepare_write_message_footer(struct ceph_connection *con) { struct ceph_msg *m = con->out_msg; + int v = con->out_kvec_left; dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; @@ -467,9 +532,9 @@ static void prepare_write_message_footer(struct ceph_connection *con, int v) static void prepare_write_message(struct ceph_connection *con) { struct ceph_msg *m; - int v = 0; + u32 crc; - con->out_kvec_bytes = 0; + ceph_con_out_kvec_reset(con); con->out_kvec_is_msg = true; con->out_msg_done = false; @@ -477,16 +542,13 @@ static void prepare_write_message(struct ceph_connection *con) * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; - con->out_kvec[v].iov_base = &tag_ack; - con->out_kvec[v++].iov_len = 1; + ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - con->out_kvec[v].iov_base = &con->out_temp_ack; - con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); - con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); + ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); } - m = list_first_entry(&con->out_queue, - struct ceph_msg, list_head); + m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); con->out_msg = m; /* put message on sent list */ @@ -510,30 +572,26 @@ static void prepare_write_message(struct ceph_connection *con) BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); /* tag + hdr + front + middle */ - con->out_kvec[v].iov_base = &tag_msg; - con->out_kvec[v++].iov_len = 1; - con->out_kvec[v].iov_base = &m->hdr; - con->out_kvec[v++].iov_len = sizeof(m->hdr); - con->out_kvec[v++] = m->front; + ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); + ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); + if (m->middle) - con->out_kvec[v++] = m->middle->vec; - con->out_kvec_left = v; - con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + - (m->middle ? m->middle->vec.iov_len : 0); - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_add(con, m->middle->vec.iov_len, + m->middle->vec.iov_base); /* fill in crc (except data pages), footer */ - con->out_msg->hdr.crc = - cpu_to_le32(crc32c(0, (void *)&m->hdr, - sizeof(m->hdr) - sizeof(m->hdr.crc))); + crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); + con->out_msg->hdr.crc = cpu_to_le32(crc); con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; - con->out_msg->footer.front_crc = - cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); - if (m->middle) - con->out_msg->footer.middle_crc = - cpu_to_le32(crc32c(0, m->middle->vec.iov_base, - m->middle->vec.iov_len)); - else + + crc = crc32c(0, m->front.iov_base, m->front.iov_len); + con->out_msg->footer.front_crc = cpu_to_le32(crc); + if (m->middle) { + crc = crc32c(0, m->middle->vec.iov_base, + m->middle->vec.iov_len); + con->out_msg->footer.middle_crc = cpu_to_le32(crc); + } else con->out_msg->footer.middle_crc = 0; con->out_msg->footer.data_crc = 0; dout("prepare_write_message front_crc %u data_crc %u\n", @@ -549,11 +607,11 @@ static void prepare_write_message(struct ceph_connection *con) else con->out_msg_pos.page_pos = 0; con->out_msg_pos.data_pos = 0; - con->out_msg_pos.did_page_crc = 0; + con->out_msg_pos.did_page_crc = false; con->out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ - prepare_write_message_footer(con, v); + prepare_write_message_footer(con); } set_bit(WRITE_PENDING, &con->state); @@ -568,14 +626,14 @@ static void prepare_write_ack(struct ceph_connection *con) con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; - con->out_kvec[0].iov_base = &tag_ack; - con->out_kvec[0].iov_len = 1; + ceph_con_out_kvec_reset(con); + + ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - con->out_kvec[1].iov_base = &con->out_temp_ack; - con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); - con->out_kvec_left = 2; - con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); + con->out_more = 1; /* more will follow.. eventually.. */ set_bit(WRITE_PENDING, &con->state); } @@ -586,11 +644,8 @@ static void prepare_write_ack(struct ceph_connection *con) static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); - con->out_kvec[0].iov_base = &tag_keepalive; - con->out_kvec[0].iov_len = 1; - con->out_kvec_left = 1; - con->out_kvec_bytes = 1; - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); set_bit(WRITE_PENDING, &con->state); } @@ -619,12 +674,9 @@ static int prepare_connect_authorizer(struct ceph_connection *con) con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); con->out_connect.authorizer_len = cpu_to_le32(auth_len); - if (auth_len) { - con->out_kvec[con->out_kvec_left].iov_base = auth_buf; - con->out_kvec[con->out_kvec_left].iov_len = auth_len; - con->out_kvec_left++; - con->out_kvec_bytes += auth_len; - } + if (auth_len) + ceph_con_out_kvec_add(con, auth_len, auth_buf); + return 0; } @@ -634,22 +686,18 @@ static int prepare_connect_authorizer(struct ceph_connection *con) static void prepare_write_banner(struct ceph_messenger *msgr, struct ceph_connection *con) { - int len = strlen(CEPH_BANNER); + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); + ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr), + &msgr->my_enc_addr); - con->out_kvec[0].iov_base = CEPH_BANNER; - con->out_kvec[0].iov_len = len; - con->out_kvec[1].iov_base = &msgr->my_enc_addr; - con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); - con->out_kvec_left = 2; - con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); - con->out_kvec_cur = con->out_kvec; con->out_more = 0; set_bit(WRITE_PENDING, &con->state); } static int prepare_write_connect(struct ceph_messenger *msgr, struct ceph_connection *con, - int after_banner) + int include_banner) { unsigned global_seq = get_global_seq(con->msgr, 0); int proto; @@ -678,22 +726,18 @@ static int prepare_write_connect(struct ceph_messenger *msgr, con->out_connect.protocol_version = cpu_to_le32(proto); con->out_connect.flags = 0; - if (!after_banner) { - con->out_kvec_left = 0; - con->out_kvec_bytes = 0; - } - con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; - con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); - con->out_kvec_left++; - con->out_kvec_bytes += sizeof(con->out_connect); - con->out_kvec_cur = con->out_kvec; + if (include_banner) + prepare_write_banner(msgr, con); + else + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); + con->out_more = 0; set_bit(WRITE_PENDING, &con->state); return prepare_connect_authorizer(con); } - /* * write as much of pending kvecs to the socket as we can. * 1 -> done @@ -714,17 +758,18 @@ static int write_partial_kvec(struct ceph_connection *con) con->out_kvec_bytes -= ret; if (con->out_kvec_bytes == 0) break; /* done */ - while (ret > 0) { - if (ret >= con->out_kvec_cur->iov_len) { - ret -= con->out_kvec_cur->iov_len; - con->out_kvec_cur++; - con->out_kvec_left--; - } else { - con->out_kvec_cur->iov_len -= ret; - con->out_kvec_cur->iov_base += ret; - ret = 0; - break; - } + + /* account for full iov entries consumed */ + while (ret >= con->out_kvec_cur->iov_len) { + BUG_ON(!con->out_kvec_left); + ret -= con->out_kvec_cur->iov_len; + con->out_kvec_cur++; + con->out_kvec_left--; + } + /* and for a partially-consumed entry */ + if (ret) { + con->out_kvec_cur->iov_len -= ret; + con->out_kvec_cur->iov_base += ret; } } con->out_kvec_left = 0; @@ -773,7 +818,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) struct ceph_msg *msg = con->out_msg; unsigned data_len = le32_to_cpu(msg->hdr.data_len); size_t len; - int crc = con->msgr->nocrc; + bool do_datacrc = !con->msgr->nocrc; int ret; int total_max_write; int in_trail = 0; @@ -790,9 +835,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) while (data_len > con->out_msg_pos.data_pos) { struct page *page = NULL; - void *kaddr = NULL; int max_write = PAGE_SIZE; - int page_shift = 0; + int bio_offset = 0; total_max_write = data_len - trail_len - con->out_msg_pos.data_pos; @@ -811,58 +855,47 @@ static int write_partial_msg_pages(struct ceph_connection *con) page = list_first_entry(&msg->trail->head, struct page, lru); - if (crc) - kaddr = kmap(page); max_write = PAGE_SIZE; } else if (msg->pages) { page = msg->pages[con->out_msg_pos.page]; - if (crc) - kaddr = kmap(page); } else if (msg->pagelist) { page = list_first_entry(&msg->pagelist->head, struct page, lru); - if (crc) - kaddr = kmap(page); #ifdef CONFIG_BLOCK } else if (msg->bio) { struct bio_vec *bv; bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); page = bv->bv_page; - page_shift = bv->bv_offset; - if (crc) - kaddr = kmap(page) + page_shift; + bio_offset = bv->bv_offset; max_write = bv->bv_len; #endif } else { - page = con->msgr->zero_page; - if (crc) - kaddr = page_address(con->msgr->zero_page); + page = zero_page; } len = min_t(int, max_write - con->out_msg_pos.page_pos, total_max_write); - if (crc && !con->out_msg_pos.did_page_crc) { - void *base = kaddr + con->out_msg_pos.page_pos; + if (do_datacrc && !con->out_msg_pos.did_page_crc) { + void *base; + u32 crc; u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); + char *kaddr; + kaddr = kmap(page); BUG_ON(kaddr == NULL); - con->out_msg->footer.data_crc = - cpu_to_le32(crc32c(tmpcrc, base, len)); - con->out_msg_pos.did_page_crc = 1; + base = kaddr + con->out_msg_pos.page_pos + bio_offset; + crc = crc32c(tmpcrc, base, len); + con->out_msg->footer.data_crc = cpu_to_le32(crc); + con->out_msg_pos.did_page_crc = true; } - ret = kernel_sendpage(con->sock, page, - con->out_msg_pos.page_pos + page_shift, - len, - MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_MORE); - - if (crc && - (msg->pages || msg->pagelist || msg->bio || in_trail)) + ret = ceph_tcp_sendpage(con->sock, page, + con->out_msg_pos.page_pos + bio_offset, + len, 1); + + if (do_datacrc) kunmap(page); - if (ret == -EAGAIN) - ret = 0; if (ret <= 0) goto out; @@ -871,7 +904,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) if (ret == len) { con->out_msg_pos.page_pos = 0; con->out_msg_pos.page++; - con->out_msg_pos.did_page_crc = 0; + con->out_msg_pos.did_page_crc = false; if (in_trail) list_move_tail(&page->lru, &msg->trail->head); @@ -888,12 +921,10 @@ static int write_partial_msg_pages(struct ceph_connection *con) dout("write_partial_msg_pages %p msg %p done\n", con, msg); /* prepare and queue up footer, too */ - if (!crc) + if (!do_datacrc) con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; - con->out_kvec_bytes = 0; - con->out_kvec_left = 0; - con->out_kvec_cur = con->out_kvec; - prepare_write_message_footer(con, 0); + ceph_con_out_kvec_reset(con); + prepare_write_message_footer(con); ret = 1; out: return ret; @@ -907,12 +938,9 @@ static int write_partial_skip(struct ceph_connection *con) int ret; while (con->out_skip > 0) { - struct kvec iov = { - .iov_base = page_address(con->msgr->zero_page), - .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) - }; + size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); - ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); if (ret <= 0) goto out; con->out_skip -= ret; @@ -1085,8 +1113,8 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, char delim, const char **ipend) { - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; + struct sockaddr_in *in4 = (struct sockaddr_in *) ss; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; memset(ss, 0, sizeof(*ss)); @@ -1512,10 +1540,9 @@ static int read_partial_message_section(struct ceph_connection *con, if (ret <= 0) return ret; section->iov_len += ret; - if (section->iov_len == sec_len) - *crc = crc32c(0, section->iov_base, - section->iov_len); } + if (section->iov_len == sec_len) + *crc = crc32c(0, section->iov_base, section->iov_len); return 1; } @@ -1527,7 +1554,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, static int read_partial_message_pages(struct ceph_connection *con, struct page **pages, - unsigned data_len, int datacrc) + unsigned data_len, bool do_datacrc) { void *p; int ret; @@ -1540,7 +1567,7 @@ static int read_partial_message_pages(struct ceph_connection *con, p = kmap(pages[con->in_msg_pos.page]); ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, left); - if (ret > 0 && datacrc) + if (ret > 0 && do_datacrc) con->in_data_crc = crc32c(con->in_data_crc, p + con->in_msg_pos.page_pos, ret); @@ -1560,7 +1587,7 @@ static int read_partial_message_pages(struct ceph_connection *con, #ifdef CONFIG_BLOCK static int read_partial_message_bio(struct ceph_connection *con, struct bio **bio_iter, int *bio_seg, - unsigned data_len, int datacrc) + unsigned data_len, bool do_datacrc) { struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); void *p; @@ -1576,7 +1603,7 @@ static int read_partial_message_bio(struct ceph_connection *con, ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, left); - if (ret > 0 && datacrc) + if (ret > 0 && do_datacrc) con->in_data_crc = crc32c(con->in_data_crc, p + con->in_msg_pos.page_pos, ret); @@ -1603,9 +1630,10 @@ static int read_partial_message(struct ceph_connection *con) int ret; int to, left; unsigned front_len, middle_len, data_len; - int datacrc = con->msgr->nocrc; + bool do_datacrc = !con->msgr->nocrc; int skip; u64 seq; + u32 crc; dout("read_partial_message con %p msg %p\n", con, m); @@ -1618,17 +1646,16 @@ static int read_partial_message(struct ceph_connection *con) if (ret <= 0) return ret; con->in_base_pos += ret; - if (con->in_base_pos == sizeof(con->in_hdr)) { - u32 crc = crc32c(0, (void *)&con->in_hdr, - sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); - if (crc != le32_to_cpu(con->in_hdr.crc)) { - pr_err("read_partial_message bad hdr " - " crc %u != expected %u\n", - crc, con->in_hdr.crc); - return -EBADMSG; - } - } } + + crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); + if (cpu_to_le32(crc) != con->in_hdr.crc) { + pr_err("read_partial_message bad hdr " + " crc %u != expected %u\n", + crc, con->in_hdr.crc); + return -EBADMSG; + } + front_len = le32_to_cpu(con->in_hdr.front_len); if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; @@ -1714,7 +1741,7 @@ static int read_partial_message(struct ceph_connection *con) while (con->in_msg_pos.data_pos < data_len) { if (m->pages) { ret = read_partial_message_pages(con, m->pages, - data_len, datacrc); + data_len, do_datacrc); if (ret <= 0) return ret; #ifdef CONFIG_BLOCK @@ -1722,7 +1749,7 @@ static int read_partial_message(struct ceph_connection *con) ret = read_partial_message_bio(con, &m->bio_iter, &m->bio_seg, - data_len, datacrc); + data_len, do_datacrc); if (ret <= 0) return ret; #endif @@ -1757,7 +1784,7 @@ static int read_partial_message(struct ceph_connection *con) m, con->in_middle_crc, m->footer.middle_crc); return -EBADMSG; } - if (datacrc && + if (do_datacrc && (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { pr_err("read_partial_message %p data crc %u != exp. %u\n", m, @@ -1819,7 +1846,6 @@ more: /* open the socket first? */ if (con->sock == NULL) { - prepare_write_banner(msgr, con); prepare_write_connect(msgr, con, 1); prepare_read_banner(con); set_bit(CONNECTING, &con->state); @@ -1829,11 +1855,9 @@ more: con->in_tag = CEPH_MSGR_TAG_READY; dout("try_write initiating connect on %p new state %lu\n", con, con->state); - con->sock = ceph_tcp_connect(con); - if (IS_ERR(con->sock)) { - con->sock = NULL; + ret = ceph_tcp_connect(con); + if (ret < 0) { con->error_msg = "connect error"; - ret = -1; goto out; } } @@ -1953,8 +1977,9 @@ more: * * FIXME: there must be a better way to do this! */ - static char buf[1024]; - int skip = min(1024, -con->in_base_pos); + static char buf[SKIP_BUF_SIZE]; + int skip = min((int) sizeof (buf), -con->in_base_pos); + dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); ret = ceph_tcp_recvmsg(con->sock, buf, skip); if (ret <= 0) @@ -2216,15 +2241,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, spin_lock_init(&msgr->global_seq_lock); - /* the zero page is needed if a request is "canceled" while the message - * is being written over the socket */ - msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); - if (!msgr->zero_page) { - kfree(msgr); - return ERR_PTR(-ENOMEM); - } - kmap(msgr->zero_page); - if (myaddr) msgr->inst.addr = *myaddr; @@ -2241,8 +2257,6 @@ EXPORT_SYMBOL(ceph_messenger_create); void ceph_messenger_destroy(struct ceph_messenger *msgr) { dout("destroy %p\n", msgr); - kunmap(msgr->zero_page); - __free_page(msgr->zero_page); kfree(msgr); dout("destroyed messenger %p\n", msgr); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index fd863fe7693..29ad46ec9dc 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -283,7 +283,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) ceph_decode_32_safe(p, end, yes, bad); #if BITS_PER_LONG == 32 err = -EINVAL; - if (yes > ULONG_MAX / sizeof(struct crush_rule_step)) + if (yes > (ULONG_MAX - sizeof(*r)) + / sizeof(struct crush_rule_step)) goto bad; #endif r = c->rules[i] = kmalloc(sizeof(*r) + diff --git a/net/core/dev.c b/net/core/dev.c index 926411b381a..5d59155adf2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3559,7 +3559,8 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { __skb_pull(skb, skb_headlen(skb)); - skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); + /* restore the reserve we had after netdev_alloc_skb_ip_align() */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e602a3104e..f223cdc75da 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -320,12 +320,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, EXPORT_SYMBOL(__netdev_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size) + int size, unsigned int truesize) { skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; - skb->truesize += size; + skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 39e0c16d54c..6e447ff94df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1078,6 +1078,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev, return addr; } +EXPORT_SYMBOL(inet_confirm_addr); /* * Device notifier diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 0e58f09e59f..851acec852d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static bool forward = NF_ACCEPT; +static bool forward = true; module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) @@ -64,7 +64,7 @@ static int __net_init iptable_filter_net_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = - -forward - 1; + forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); @@ -88,11 +88,6 @@ static int __init iptable_filter_init(void) { int ret; - if (forward < 0 || forward > NF_MAX_VERDICT) { - pr_err("iptables forward must be 0 or 1\n"); - return -EINVAL; - } - ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index a8f6da97e3b..325e59a0224 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static bool forward = NF_ACCEPT; +static bool forward = true; module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) @@ -56,7 +56,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ip6t_standard *)repl->entries)[1].target.verdict = - -forward - 1; + forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; net->ipv6.ip6table_filter = ip6t_register_table(net, &packet_filter, repl); @@ -80,11 +80,6 @@ static int __init ip6table_filter_init(void) { int ret; - if (forward < 0 || forward > NF_MAX_VERDICT) { - pr_err("iptables forward must be 0 or 1\n"); - return -EINVAL; - } - ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 24c456e8aa1..496b62712fe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2474,8 +2474,12 @@ static int rt6_fill_node(struct net *net, rcu_read_lock(); n = dst_get_neighbour_noref(&rt->dst); - if (n) - NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + if (n) { + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + } rcu_read_unlock(); if (rt->dst.dev) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 9b071910b4b..1addd9f3f40 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1845,3 +1845,4 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); +MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7b48035826e..cbdb754dbb1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -768,8 +768,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, - unsigned int dataoff, u32 hash, - unsigned int *timeouts) + unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_help *help; @@ -777,6 +776,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + struct nf_conn_timeout *timeout_ext; + unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); @@ -788,12 +789,21 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; + timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; } + if (timeout_ext) + nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); @@ -854,8 +864,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, int *set_reply, - enum ip_conntrack_info *ctinfo, - unsigned int *timeouts) + enum ip_conntrack_info *ctinfo) { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; @@ -875,7 +884,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, - skb, dataoff, hash, timeouts); + skb, dataoff, hash); if (!h) return NULL; if (IS_ERR(h)) @@ -964,19 +973,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } - /* Decide what timeout policy we want to apply to this flow. */ - if (tmpl) { - timeout_ext = nf_ct_timeout_find(tmpl); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); - } else - timeouts = l4proto->get_timeouts(net); - ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo, - timeouts); + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); @@ -993,6 +991,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); + /* Decide what timeout policy we want to apply to this flow. */ + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5701c8dd783..be3da2c8cdc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -127,6 +127,27 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); +struct nf_conntrack_l4proto * +nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) +{ + struct nf_conntrack_l4proto *p; + + rcu_read_lock(); + p = __nf_ct_l4proto_find(l3num, l4num); + if (!try_module_get(p->me)) + p = &nf_conntrack_l4proto_generic; + rcu_read_unlock(); + + return p; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); + +void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); + static int kill_l3proto(struct nf_conn *i, void *data) { return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index fec29a43de4..2b9e79f5ef0 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -98,11 +98,13 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - l4proto = __nf_ct_l4proto_find(l3num, l4num); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); /* This protocol is not supportted, skip. */ - if (l4proto->l4proto != l4num) - return -EOPNOTSUPP; + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err_proto_put; + } if (matching) { if (nlh->nlmsg_flags & NLM_F_REPLACE) { @@ -110,20 +112,25 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, * different kind, sorry. */ if (matching->l3num != l3num || - matching->l4num != l4num) - return -EINVAL; + matching->l4proto->l4proto != l4num) { + ret = -EINVAL; + goto err_proto_put; + } ret = ctnl_timeout_parse_policy(matching, l4proto, cda[CTA_TIMEOUT_DATA]); return ret; } - return -EBUSY; + ret = -EBUSY; + goto err_proto_put; } timeout = kzalloc(sizeof(struct ctnl_timeout) + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); - if (timeout == NULL) - return -ENOMEM; + if (timeout == NULL) { + ret = -ENOMEM; + goto err_proto_put; + } ret = ctnl_timeout_parse_policy(timeout, l4proto, cda[CTA_TIMEOUT_DATA]); @@ -132,13 +139,15 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->l3num = l3num; - timeout->l4num = l4num; + timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &cttimeout_list); return 0; err: kfree(timeout); +err_proto_put: + nf_ct_l4proto_put(l4proto); return ret; } @@ -149,7 +158,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; unsigned int flags = pid ? NLM_F_MULTI : 0; - struct nf_conntrack_l4proto *l4proto; + struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); @@ -163,20 +172,10 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name); NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)); - NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4num); + NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto); NLA_PUT_BE32(skb, CTA_TIMEOUT_USE, htonl(atomic_read(&timeout->refcnt))); - l4proto = __nf_ct_l4proto_find(timeout->l3num, timeout->l4num); - - /* If the timeout object does not match the layer 4 protocol tracker, - * then skip dumping the data part since we don't know how to - * interpret it. This may happen for UPDlite, SCTP and DCCP since - * you can unload the module. - */ - if (timeout->l4num != l4proto->l4proto) - goto out; - if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { struct nlattr *nest_parms; int ret; @@ -192,7 +191,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, nla_nest_end(skb, nest_parms); } -out: + nlmsg_end(skb, nlh); return skb->len; @@ -293,6 +292,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) if (atomic_dec_and_test(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); + nf_ct_l4proto_put(timeout->l4proto); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -417,6 +417,7 @@ static void __exit cttimeout_exit(void) /* We are sure that our objects have no clients at this point, * it's safe to release them all without checking refcnt. */ + nf_ct_l4proto_put(cur->l4proto); kfree_rcu(cur, rcu_head); } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b873445df44..0c8e43810ce 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -14,8 +14,10 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CT.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -217,50 +219,59 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; + rcu_read_lock(); timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (timeout_find_get) { const struct ipt_entry *e = par->entryinfo; + struct nf_conntrack_l4proto *l4proto; if (e->ip.invflags & IPT_INV_PROTO) { ret = -EINVAL; pr_info("You cannot use inversion on " "L4 protocol\n"); - goto err3; + goto err4; } timeout = timeout_find_get(info->timeout); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", info->timeout); - goto err3; + goto err4; } if (timeout->l3num != par->family) { ret = -EINVAL; pr_info("Timeout policy `%s' can only be " "used by L3 protocol number %d\n", info->timeout, timeout->l3num); - goto err3; + goto err4; } - if (timeout->l4num != e->ip.proto) { + /* Make sure the timeout policy matches any existing + * protocol tracker, otherwise default to generic. + */ + l4proto = __nf_ct_l4proto_find(par->family, + e->ip.proto); + if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; pr_info("Timeout policy `%s' can only be " "used by L4 protocol number %d\n", - info->timeout, timeout->l4num); - goto err3; + info->timeout, + timeout->l4proto->l4proto); + goto err4; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_KERNEL); if (timeout_ext == NULL) { ret = -ENOMEM; - goto err3; + goto err4; } } else { ret = -ENOENT; pr_info("Timeout policy base is empty\n"); - goto err3; + goto err4; } + rcu_read_unlock(); } #endif @@ -270,6 +281,8 @@ out: info->ct = ct; return 0; +err4: + rcu_read_unlock(); err3: nf_conntrack_free(ct); err2: @@ -311,6 +324,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_l3proto_module_put(par->family); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT + rcu_read_lock(); timeout_put = rcu_dereference(nf_ct_timeout_put_hook); if (timeout_put) { @@ -318,6 +332,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) if (timeout_ext) timeout_put(timeout_ext->timeout); } + rcu_read_unlock(); #endif } nf_ct_put(info->ct); diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index f99f8dee238..ff5f75fddb1 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -480,7 +480,7 @@ ipt_log_packet(u_int8_t pf, sb_close(m); } -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) /* One level of recursion won't kill us */ static void dump_ipv6_packet(struct sbuff *m, const struct nf_loginfo *info, @@ -824,7 +824,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) if (par->family == NFPROTO_IPV4) ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); @@ -864,7 +864,7 @@ static struct xt_target log_tg_regs[] __read_mostly = { .checkentry = log_tg_check, .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "LOG", .family = NFPROTO_IPV6, @@ -882,7 +882,7 @@ static struct nf_logger ipt_log_logger __read_mostly = { .me = THIS_MODULE, }; -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static struct nf_logger ip6t_log_logger __read_mostly = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, @@ -899,7 +899,7 @@ static int __init log_tg_init(void) return ret; nf_log_register(NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); #endif return 0; @@ -908,7 +908,7 @@ static int __init log_tg_init(void) static void __exit log_tg_exit(void) { nf_log_unregister(&ipt_log_logger); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_unregister(&ip6t_log_logger); #endif xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 2560e7b441c..7c94aedd091 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -597,7 +597,7 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, iter = iter->next; iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; } - ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC); + ret_val = netlbl_secattr_catmap_setbit(iter, spot, flags); } return ret_val; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 51c868923f6..a1e11627747 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -749,7 +749,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) int ret; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); + ic = kzalloc(sizeof(struct rds_ib_connection), gfp); if (!ic) return -ENOMEM; diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 9556d2895f7..a91e1db62ee 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -694,7 +694,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) unsigned long flags; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); + ic = kzalloc(sizeof(struct rds_iw_connection), gfp); if (!ic) return -ENOMEM; diff --git a/net/rds/loop.c b/net/rds/loop.c index 87ff2a8a454..6b12b68541a 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -121,7 +121,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) struct rds_loop_connection *lc; unsigned long flags; - lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); + lc = kzalloc(sizeof(struct rds_loop_connection), gfp); if (!lc) return -ENOMEM; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 354760ebbbd..f974961754c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -29,6 +29,7 @@ #include <linux/rfkill.h> #include <linux/sched.h> #include <linux/spinlock.h> +#include <linux/device.h> #include <linux/miscdevice.h> #include <linux/wait.h> #include <linux/poll.h> diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index ffd243d0918..9fe8857d8d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -39,3 +39,16 @@ config RPCSEC_GSS_KRB5 Kerberos support should be installed. If unsure, say Y. + +config SUNRPC_DEBUG + bool "RPC: Enable dprintk debugging" + depends on SUNRPC && SYSCTL + help + This option enables a sysctl-based debugging interface + that is be used by the 'rpcdebug' utility to turn on or off + logging of different aspects of the kernel RPC activity. + + Disabling this option will make your kernel slightly smaller, + but makes troubleshooting NFS issues significantly harder. + + If unsure, say Y. diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index ee77742e0ed..d11418f97f1 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -156,8 +156,9 @@ static size_t rpc_pton4(const char *buf, const size_t buflen, } #if IS_ENABLED(CONFIG_IPV6) -static int rpc_parse_scope_id(const char *buf, const size_t buflen, - const char *delim, struct sockaddr_in6 *sin6) +static int rpc_parse_scope_id(struct net *net, const char *buf, + const size_t buflen, const char *delim, + struct sockaddr_in6 *sin6) { char *p; size_t len; @@ -177,7 +178,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen, unsigned long scope_id = 0; struct net_device *dev; - dev = dev_get_by_name(&init_net, p); + dev = dev_get_by_name(net, p); if (dev != NULL) { scope_id = dev->ifindex; dev_put(dev); @@ -197,7 +198,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen, return 0; } -static size_t rpc_pton6(const char *buf, const size_t buflen, +static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; @@ -213,14 +214,14 @@ static size_t rpc_pton6(const char *buf, const size_t buflen, if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) return 0; - if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6)) return 0; sin6->sin6_family = AF_INET6; return sizeof(struct sockaddr_in6); } #else -static size_t rpc_pton6(const char *buf, const size_t buflen, +static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { return 0; @@ -229,6 +230,7 @@ static size_t rpc_pton6(const char *buf, const size_t buflen, /** * rpc_pton - Construct a sockaddr in @sap + * @net: applicable network namespace * @buf: C string containing presentation format IP address * @buflen: length of presentation address in bytes * @sap: buffer into which to plant socket address @@ -241,14 +243,14 @@ static size_t rpc_pton6(const char *buf, const size_t buflen, * socket address, if successful. Returns zero if an error * occurred. */ -size_t rpc_pton(const char *buf, const size_t buflen, +size_t rpc_pton(struct net *net, const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { unsigned int i; for (i = 0; i < buflen; i++) if (buf[i] == ':') - return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton6(net, buf, buflen, sap, salen); return rpc_pton4(buf, buflen, sap, salen); } EXPORT_SYMBOL_GPL(rpc_pton); @@ -295,6 +297,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @net: applicable network namespace * @uaddr: C string containing universal address to convert * @uaddr_len: length of universal address string * @sap: buffer into which to plant socket address @@ -306,8 +309,9 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) * Returns the size of the socket address if successful; otherwise * zero is returned. */ -size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, - struct sockaddr *sap, const size_t salen) +size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, + const size_t uaddr_len, struct sockaddr *sap, + const size_t salen) { char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; unsigned long portlo, porthi; @@ -339,7 +343,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, port = (unsigned short)((porthi << 8) | portlo); *c = '\0'; - if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0) return 0; switch (sap->sa_family) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index affa631ac1a..d3ad81f8da5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -81,7 +81,7 @@ struct gss_auth { * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ - struct dentry *dentry[2]; + struct rpc_pipe *pipe[2]; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ @@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx) /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context - * and a new one is protected by the inode->i_lock. + * and a new one is protected by the pipe->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) @@ -251,7 +251,7 @@ struct gss_upcall_msg { struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; - struct rpc_inode *inode; + struct rpc_pipe *pipe; struct rpc_wait_queue rpc_waitqueue; wait_queue_head_t waitqueue; struct gss_cl_ctx *ctx; @@ -294,10 +294,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) +__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &rpci->in_downcall, list) { + list_for_each_entry(pos, &pipe->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -315,18 +315,17 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { - struct rpc_inode *rpci = gss_msg->inode; - struct inode *inode = &rpci->vfs_inode; + struct rpc_pipe *pipe = gss_msg->pipe; struct gss_upcall_msg *old; - spin_lock(&inode->i_lock); - old = __gss_find_upcall(rpci, gss_msg->uid); + spin_lock(&pipe->lock); + old = __gss_find_upcall(pipe, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &rpci->in_downcall); + list_add(&gss_msg->list, &pipe->in_downcall); } else gss_msg = old; - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); return gss_msg; } @@ -342,14 +341,14 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg) static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - struct inode *inode = &gss_msg->inode->vfs_inode; + struct rpc_pipe *pipe = gss_msg->pipe; if (list_empty(&gss_msg->list)) return; - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); } static void @@ -376,11 +375,11 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; - struct inode *inode = &gss_msg->inode->vfs_inode; + struct rpc_pipe *pipe = gss_msg->pipe; - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); gss_handle_downcall_result(gss_cred, gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -450,7 +449,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, kfree(gss_msg); return ERR_PTR(vers); } - gss_msg->inode = RPC_I(gss_auth->dentry[vers]->d_inode); + gss_msg->pipe = gss_auth->pipe[vers]; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -474,8 +473,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - struct inode *inode = &gss_new->inode->vfs_inode; - int res = rpc_queue_upcall(inode, &gss_new->msg); + int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); gss_msg = ERR_PTR(res); @@ -506,7 +504,7 @@ gss_refresh_upcall(struct rpc_task *task) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; - struct inode *inode; + struct rpc_pipe *pipe; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -524,8 +522,8 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - inode = &gss_msg->inode->vfs_inode; - spin_lock(&inode->i_lock); + pipe = gss_msg->pipe; + spin_lock(&pipe->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -538,7 +536,7 @@ gss_refresh_upcall(struct rpc_task *task) gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -549,7 +547,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { - struct inode *inode; + struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -573,14 +571,14 @@ retry: err = PTR_ERR(gss_msg); goto out; } - inode = &gss_msg->inode->vfs_inode; + pipe = gss_msg->pipe; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; @@ -591,7 +589,7 @@ retry: gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -609,7 +607,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct inode *inode = filp->f_path.dentry->d_inode; + struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe; struct gss_cl_ctx *ctx; uid_t uid; ssize_t err = -EFBIG; @@ -639,14 +637,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - spin_lock(&inode->i_lock); - gss_msg = __gss_find_upcall(RPC_I(inode), uid); + spin_lock(&pipe->lock); + gss_msg = __gss_find_upcall(pipe, uid); if (gss_msg == NULL) { - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { @@ -674,9 +672,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = mlen; err_release_msg: - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); @@ -722,23 +720,23 @@ static int gss_pipe_open_v1(struct inode *inode) static void gss_pipe_release(struct inode *inode) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; restart: - spin_lock(&inode->i_lock); - list_for_each_entry(gss_msg, &rpci->in_downcall, list) { + spin_lock(&pipe->lock); + list_for_each_entry(gss_msg, &pipe->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); gss_release_msg(gss_msg); goto restart; } - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); put_pipe_version(); } @@ -759,6 +757,75 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) } } +static void gss_pipes_dentries_destroy(struct rpc_auth *auth) +{ + struct gss_auth *gss_auth; + + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + if (gss_auth->pipe[0]->dentry) + rpc_unlink(gss_auth->pipe[0]->dentry); + if (gss_auth->pipe[1]->dentry) + rpc_unlink(gss_auth->pipe[1]->dentry); +} + +static int gss_pipes_dentries_create(struct rpc_auth *auth) +{ + int err; + struct gss_auth *gss_auth; + struct rpc_clnt *clnt; + + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + clnt = gss_auth->client; + + gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, + "gssd", + clnt, gss_auth->pipe[1]); + if (IS_ERR(gss_auth->pipe[1]->dentry)) + return PTR_ERR(gss_auth->pipe[1]->dentry); + gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, + gss_auth->mech->gm_name, + clnt, gss_auth->pipe[0]); + if (IS_ERR(gss_auth->pipe[0]->dentry)) { + err = PTR_ERR(gss_auth->pipe[0]->dentry); + goto err_unlink_pipe_1; + } + return 0; + +err_unlink_pipe_1: + rpc_unlink(gss_auth->pipe[1]->dentry); + return err; +} + +static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, + struct rpc_auth *auth) +{ + struct net *net = rpc_net_ns(clnt); + struct super_block *sb; + + sb = rpc_get_sb_net(net); + if (sb) { + if (clnt->cl_dentry) + gss_pipes_dentries_destroy(auth); + rpc_put_sb_net(net); + } +} + +static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, + struct rpc_auth *auth) +{ + struct net *net = rpc_net_ns(clnt); + struct super_block *sb; + int err = 0; + + sb = rpc_get_sb_net(net); + if (sb) { + if (clnt->cl_dentry) + err = gss_pipes_dentries_create(auth); + rpc_put_sb_net(net); + } + return err; +} + /* * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) @@ -801,32 +868,33 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, - "gssd", - clnt, &gss_upcall_ops_v1, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry[1])) { - err = PTR_ERR(gss_auth->dentry[1]); + gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->pipe[1])) { + err = PTR_ERR(gss_auth->pipe[1]); goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, - gss_auth->mech->gm_name, - clnt, &gss_upcall_ops_v0, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry[0])) { - err = PTR_ERR(gss_auth->dentry[0]); - goto err_unlink_pipe_1; + gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->pipe[0])) { + err = PTR_ERR(gss_auth->pipe[0]); + goto err_destroy_pipe_1; } + err = gss_pipes_dentries_create_net(clnt, auth); + if (err) + goto err_destroy_pipe_0; err = rpcauth_init_credcache(auth); if (err) - goto err_unlink_pipe_0; + goto err_unlink_pipes; return auth; -err_unlink_pipe_0: - rpc_unlink(gss_auth->dentry[0]); -err_unlink_pipe_1: - rpc_unlink(gss_auth->dentry[1]); +err_unlink_pipes: + gss_pipes_dentries_destroy_net(clnt, auth); +err_destroy_pipe_0: + rpc_destroy_pipe_data(gss_auth->pipe[0]); +err_destroy_pipe_1: + rpc_destroy_pipe_data(gss_auth->pipe[1]); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -839,8 +907,9 @@ out_dec: static void gss_free(struct gss_auth *gss_auth) { - rpc_unlink(gss_auth->dentry[1]); - rpc_unlink(gss_auth->dentry[0]); + gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); + rpc_destroy_pipe_data(gss_auth->pipe[0]); + rpc_destroy_pipe_data(gss_auth->pipe[1]); gss_mech_put(gss_auth->mech); kfree(gss_auth); @@ -1547,7 +1616,9 @@ static const struct rpc_authops authgss_ops = { .create = gss_create, .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, - .crcreate = gss_create_cred + .crcreate = gss_create_cred, + .pipes_create = gss_pipes_dentries_create, + .pipes_destroy = gss_pipes_dentries_destroy, }; static const struct rpc_credops gss_credops = { @@ -1591,6 +1662,21 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .release_pipe = gss_pipe_release, }; +static __net_init int rpcsec_gss_init_net(struct net *net) +{ + return gss_svc_init_net(net); +} + +static __net_exit void rpcsec_gss_exit_net(struct net *net) +{ + gss_svc_shutdown_net(net); +} + +static struct pernet_operations rpcsec_gss_net_ops = { + .init = rpcsec_gss_init_net, + .exit = rpcsec_gss_exit_net, +}; + /* * Initialize RPCSEC_GSS module */ @@ -1604,8 +1690,13 @@ static int __init init_rpcsec_gss(void) err = gss_svc_init(); if (err) goto out_unregister; + err = register_pernet_subsys(&rpcsec_gss_net_ops); + if (err) + goto out_svc_exit; rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version"); return 0; +out_svc_exit: + gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: @@ -1614,6 +1705,7 @@ out: static void __exit exit_rpcsec_gss(void) { + unregister_pernet_subsys(&rpcsec_gss_net_ops); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 9576f35ab70..0f43e894bc0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -600,11 +600,14 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf, u32 ret; struct scatterlist sg[1]; struct blkcipher_desc desc = { .tfm = cipher, .info = iv }; - u8 data[crypto_blkcipher_blocksize(cipher) * 2]; + u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; struct page **save_pages; u32 len = buf->len - offset; - BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2); + if (len > ARRAY_SIZE(data)) { + WARN_ON(0); + return -ENOMEM; + } /* * For encryption, we want to read from the cleartext diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 8c67890de42..8eff8c32d1b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -344,7 +344,7 @@ out_err: return PTR_ERR(p); } -struct crypto_blkcipher * +static struct crypto_blkcipher * context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) { struct crypto_blkcipher *cp; diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index d7941eab779..62ae3273186 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -159,7 +159,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } -u32 +static u32 gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token) { diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 8d0f7d3c71c..1600cfb1618 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -48,6 +48,8 @@ #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/cache.h> +#include "../netns.h" + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -75,10 +77,8 @@ struct rsi { int major_status, minor_status; }; -static struct cache_head *rsi_table[RSI_HASHMAX]; -static struct cache_detail rsi_cache; -static struct rsi *rsi_update(struct rsi *new, struct rsi *old); -static struct rsi *rsi_lookup(struct rsi *item); +static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old); +static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item); static void rsi_free(struct rsi *rsii) { @@ -216,7 +216,7 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.in_token, buf, len)) goto out; - rsip = rsi_lookup(&rsii); + rsip = rsi_lookup(cd, &rsii); if (!rsip) goto out; @@ -258,21 +258,20 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.out_token, buf, len)) goto out; rsii.h.expiry_time = expiry; - rsip = rsi_update(&rsii, rsip); + rsip = rsi_update(cd, &rsii, rsip); status = 0; out: rsi_free(&rsii); if (rsip) - cache_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, cd); else status = -ENOMEM; return status; } -static struct cache_detail rsi_cache = { +static struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, - .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, @@ -283,24 +282,24 @@ static struct cache_detail rsi_cache = { .alloc = rsi_alloc, }; -static struct rsi *rsi_lookup(struct rsi *item) +static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item) { struct cache_head *ch; int hash = rsi_hash(item); - ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash); + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct rsi, h); else return NULL; } -static struct rsi *rsi_update(struct rsi *new, struct rsi *old) +static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old) { struct cache_head *ch; int hash = rsi_hash(new); - ch = sunrpc_cache_update(&rsi_cache, &new->h, + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsi, h); @@ -339,10 +338,8 @@ struct rsc { char *client_name; }; -static struct cache_head *rsc_table[RSC_HASHMAX]; -static struct cache_detail rsc_cache; -static struct rsc *rsc_update(struct rsc *new, struct rsc *old); -static struct rsc *rsc_lookup(struct rsc *item); +static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); +static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item); static void rsc_free(struct rsc *rsci) { @@ -444,7 +441,7 @@ static int rsc_parse(struct cache_detail *cd, if (expiry == 0) goto out; - rscp = rsc_lookup(&rsci); + rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; @@ -506,22 +503,21 @@ static int rsc_parse(struct cache_detail *cd, } rsci.h.expiry_time = expiry; - rscp = rsc_update(&rsci, rscp); + rscp = rsc_update(cd, &rsci, rscp); status = 0; out: gss_mech_put(gm); rsc_free(&rsci); if (rscp) - cache_put(&rscp->h, &rsc_cache); + cache_put(&rscp->h, cd); else status = -ENOMEM; return status; } -static struct cache_detail rsc_cache = { +static struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, - .hash_table = rsc_table, .name = "auth.rpcsec.context", .cache_put = rsc_put, .cache_parse = rsc_parse, @@ -531,24 +527,24 @@ static struct cache_detail rsc_cache = { .alloc = rsc_alloc, }; -static struct rsc *rsc_lookup(struct rsc *item) +static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item) { struct cache_head *ch; int hash = rsc_hash(item); - ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash); + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct rsc, h); else return NULL; } -static struct rsc *rsc_update(struct rsc *new, struct rsc *old) +static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old) { struct cache_head *ch; int hash = rsc_hash(new); - ch = sunrpc_cache_update(&rsc_cache, &new->h, + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsc, h); @@ -558,7 +554,7 @@ static struct rsc *rsc_update(struct rsc *new, struct rsc *old) static struct rsc * -gss_svc_searchbyctx(struct xdr_netobj *handle) +gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; @@ -566,11 +562,11 @@ gss_svc_searchbyctx(struct xdr_netobj *handle) memset(&rsci, 0, sizeof(rsci)); if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) return NULL; - found = rsc_lookup(&rsci); + found = rsc_lookup(cd, &rsci); rsc_free(&rsci); if (!found) return NULL; - if (cache_check(&rsc_cache, &found->h, NULL)) + if (cache_check(cd, &found->h, NULL)) return NULL; return found; } @@ -968,20 +964,20 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) } static inline int -gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) +gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi *rsip) { struct rsc *rsci; int rc; if (rsip->major_status != GSS_S_COMPLETE) return gss_write_null_verf(rqstp); - rsci = gss_svc_searchbyctx(&rsip->out_handle); + rsci = gss_svc_searchbyctx(cd, &rsip->out_handle); if (rsci == NULL) { rsip->major_status = GSS_S_NO_CONTEXT; return gss_write_null_verf(rqstp); } rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); - cache_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, cd); return rc; } @@ -1000,6 +996,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, struct xdr_netobj tmpobj; struct rsi *rsip, rsikey; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); /* Read the verifier; should be NULL: */ *authp = rpc_autherr_badverf; @@ -1028,17 +1025,17 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, } /* Perform upcall, or find upcall result: */ - rsip = rsi_lookup(&rsikey); + rsip = rsi_lookup(sn->rsi_cache, &rsikey); rsi_free(&rsikey); if (!rsip) return SVC_CLOSE; - if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) + if (cache_check(sn->rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ return SVC_CLOSE; ret = SVC_CLOSE; /* Got an answer to the upcall; use it: */ - if (gss_write_init_verf(rqstp, rsip)) + if (gss_write_init_verf(sn->rsc_cache, rqstp, rsip)) goto out; if (resv->iov_len + 4 > PAGE_SIZE) goto out; @@ -1055,7 +1052,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, ret = SVC_COMPLETE; out: - cache_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, sn->rsi_cache); return ret; } @@ -1079,6 +1076,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", argv->iov_len); @@ -1129,7 +1127,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: /* Look up the context, and check the verifier: */ *authp = rpcsec_gsserr_credproblem; - rsci = gss_svc_searchbyctx(&gc->gc_ctx); + rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); if (!rsci) goto auth_err; switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { @@ -1209,7 +1207,7 @@ drop: ret = SVC_DROP; out: if (rsci) - cache_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, sn->rsc_cache); return ret; } @@ -1362,6 +1360,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; @@ -1404,7 +1403,7 @@ out_err: put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd->rsci) - cache_put(&gsd->rsci->h, &rsc_cache); + cache_put(&gsd->rsci->h, sn->rsc_cache); gsd->rsci = NULL; return stat; @@ -1429,30 +1428,96 @@ static struct auth_ops svcauthops_gss = { .set_client = svcauth_gss_set_client, }; +static int rsi_cache_create_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&rsi_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->rsi_cache = cd; + return 0; +} + +static void rsi_cache_destroy_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->rsi_cache; + + sn->rsi_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + +static int rsc_cache_create_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&rsc_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->rsc_cache = cd; + return 0; +} + +static void rsc_cache_destroy_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->rsc_cache; + + sn->rsc_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + int -gss_svc_init(void) +gss_svc_init_net(struct net *net) { - int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); + int rv; + + rv = rsc_cache_create_net(net); if (rv) return rv; - rv = cache_register(&rsc_cache); + rv = rsi_cache_create_net(net); if (rv) goto out1; - rv = cache_register(&rsi_cache); - if (rv) - goto out2; return 0; -out2: - cache_unregister(&rsc_cache); out1: - svc_auth_unregister(RPC_AUTH_GSS); + rsc_cache_destroy_net(net); return rv; } void +gss_svc_shutdown_net(struct net *net) +{ + rsi_cache_destroy_net(net); + rsc_cache_destroy_net(net); +} + +int +gss_svc_init(void) +{ + return svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); +} + +void gss_svc_shutdown(void) { - cache_unregister(&rsc_cache); - cache_unregister(&rsi_cache); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 3ad435a14ad..31def68a0f6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -25,6 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/slab.h> #include <linux/sunrpc/xprt.h> #include <linux/export.h> +#include <linux/sunrpc/bc_xprt.h> #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_TRANS diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 465df9ae104..f21ece08876 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -344,7 +344,7 @@ static int current_index; static void do_cache_clean(struct work_struct *work); static struct delayed_work cache_cleaner; -static void sunrpc_init_cache_detail(struct cache_detail *cd) +void sunrpc_init_cache_detail(struct cache_detail *cd) { rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); @@ -360,8 +360,9 @@ static void sunrpc_init_cache_detail(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); } +EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); -static void sunrpc_destroy_cache_detail(struct cache_detail *cd) +void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -384,6 +385,7 @@ static void sunrpc_destroy_cache_detail(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } +EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); /* clean cache tries to find something to clean * and cleans it. @@ -1643,12 +1645,6 @@ int cache_register_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_register_net); -int cache_register(struct cache_detail *cd) -{ - return cache_register_net(cd, &init_net); -} -EXPORT_SYMBOL_GPL(cache_register); - void cache_unregister_net(struct cache_detail *cd, struct net *net) { remove_cache_proc_entries(cd, net); @@ -1656,11 +1652,31 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -void cache_unregister(struct cache_detail *cd) +struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) +{ + struct cache_detail *cd; + + cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + return ERR_PTR(-ENOMEM); + + cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), + GFP_KERNEL); + if (cd->hash_table == NULL) { + kfree(cd); + return ERR_PTR(-ENOMEM); + } + cd->net = net; + return cd; +} +EXPORT_SYMBOL_GPL(cache_create_net); + +void cache_destroy_net(struct cache_detail *cd, struct net *net) { - cache_unregister_net(cd, &init_net); + kfree(cd->hash_table); + kfree(cd); } -EXPORT_SYMBOL_GPL(cache_unregister); +EXPORT_SYMBOL_GPL(cache_destroy_net); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) @@ -1787,17 +1803,14 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, struct dentry *dir; int ret = 0; - sunrpc_init_cache_detail(cd); q.name = name; q.len = strlen(name); q.hash = full_name_hash(q.name, q.len); dir = rpc_create_cache_dir(parent, &q, umode, cd); if (!IS_ERR(dir)) cd->u.pipefs.dir = dir; - else { - sunrpc_destroy_cache_detail(cd); + else ret = PTR_ERR(dir); - } return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); @@ -1806,7 +1819,6 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { rpc_remove_cache_dir(cd->u.pipefs.dir); cd->u.pipefs.dir = NULL; - sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d3daab6f2c0..67972462a54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -30,13 +30,16 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/un.h> +#include <linux/rcupdate.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> +#include <trace/events/sunrpc.h> #include "sunrpc.h" +#include "netns.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -49,8 +52,6 @@ /* * All RPC clients are linked into this list */ -static LIST_HEAD(all_clients); -static DEFINE_SPINLOCK(rpc_client_lock); static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -80,82 +81,191 @@ static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { - spin_lock(&rpc_client_lock); - list_add(&clnt->cl_clients, &all_clients); - spin_unlock(&rpc_client_lock); + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + spin_lock(&sn->rpc_client_lock); + list_add(&clnt->cl_clients, &sn->all_clients); + spin_unlock(&sn->rpc_client_lock); } static void rpc_unregister_client(struct rpc_clnt *clnt) { - spin_lock(&rpc_client_lock); + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + spin_lock(&sn->rpc_client_lock); list_del(&clnt->cl_clients); - spin_unlock(&rpc_client_lock); + spin_unlock(&sn->rpc_client_lock); } -static int -rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) +static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) +{ + if (clnt->cl_dentry) { + if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) + clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); + rpc_remove_client_dir(clnt->cl_dentry); + } + clnt->cl_dentry = NULL; +} + +static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) +{ + struct net *net = rpc_net_ns(clnt); + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + __rpc_clnt_remove_pipedir(clnt); + rpc_put_sb_net(net); + } +} + +static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, + struct rpc_clnt *clnt, + const char *dir_name) { static uint32_t clntid; - struct path path, dir; char name[15]; struct qstr q = { .name = name, }; + struct dentry *dir, *dentry; int error; - clnt->cl_path.mnt = ERR_PTR(-ENOENT); - clnt->cl_path.dentry = ERR_PTR(-ENOENT); - if (dir_name == NULL) - return 0; - - path.mnt = rpc_get_mount(); - if (IS_ERR(path.mnt)) - return PTR_ERR(path.mnt); - error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir); - if (error) - goto err; - + dir = rpc_d_lookup_sb(sb, dir_name); + if (dir == NULL) + return dir; for (;;) { q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; q.hash = full_name_hash(q.name, q.len); - path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt); - if (!IS_ERR(path.dentry)) + dentry = rpc_create_client_dir(dir, &q, clnt); + if (!IS_ERR(dentry)) break; - error = PTR_ERR(path.dentry); + error = PTR_ERR(dentry); if (error != -EEXIST) { printk(KERN_INFO "RPC: Couldn't create pipefs entry" " %s/%s, error %d\n", dir_name, name, error); - goto err_path_put; + break; } } - path_put(&dir); - clnt->cl_path = path; + dput(dir); + return dentry; +} + +static int +rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) +{ + struct net *net = rpc_net_ns(clnt); + struct super_block *pipefs_sb; + struct dentry *dentry; + + clnt->cl_dentry = NULL; + if (dir_name == NULL) + return 0; + pipefs_sb = rpc_get_sb_net(net); + if (!pipefs_sb) + return 0; + dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); + rpc_put_sb_net(net); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + clnt->cl_dentry = dentry; return 0; -err_path_put: - path_put(&dir); -err: - rpc_put_mount(); +} + +static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, + struct super_block *sb) +{ + struct dentry *dentry; + int err = 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + if (clnt->cl_program->pipe_dir_name == NULL) + break; + dentry = rpc_setup_pipedir_sb(sb, clnt, + clnt->cl_program->pipe_dir_name); + BUG_ON(dentry == NULL); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + clnt->cl_dentry = dentry; + if (clnt->cl_auth->au_ops->pipes_create) { + err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth); + if (err) + __rpc_clnt_remove_pipedir(clnt); + } + break; + case RPC_PIPEFS_UMOUNT: + __rpc_clnt_remove_pipedir(clnt); + break; + default: + printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); + return -ENOTSUPP; + } + return err; +} + +static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct rpc_clnt *clnt; + + spin_lock(&sn->rpc_client_lock); + list_for_each_entry(clnt, &sn->all_clients, cl_clients) { + if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || + ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) + continue; + atomic_inc(&clnt->cl_count); + spin_unlock(&sn->rpc_client_lock); + return clnt; + } + spin_unlock(&sn->rpc_client_lock); + return NULL; +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct rpc_clnt *clnt; + int error = 0; + + while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { + error = __rpc_pipefs_event(clnt, event, sb); + rpc_release_client(clnt); + if (error) + break; + } return error; } +static struct notifier_block rpc_clients_block = { + .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_RPC_PRIO, +}; + +int rpc_clients_notifier_register(void) +{ + return rpc_pipefs_notifier_register(&rpc_clients_block); +} + +void rpc_clients_notifier_unregister(void) +{ + return rpc_pipefs_notifier_unregister(&rpc_clients_block); +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { - struct rpc_program *program = args->program; - struct rpc_version *version; + const struct rpc_program *program = args->program; + const struct rpc_version *version; struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - size_t len; /* sanity check the name before trying to print it */ - err = -EINVAL; - len = strlen(args->servername); - if (len > RPC_MAXNETNAMELEN) - goto out_no_rpciod; - len++; - dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, args->servername, xprt); @@ -178,17 +288,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_err; clnt->cl_parent = clnt; - clnt->cl_server = clnt->cl_inline_name; - if (len > sizeof(clnt->cl_inline_name)) { - char *buf = kmalloc(len, GFP_KERNEL); - if (buf != NULL) - clnt->cl_server = buf; - else - len = sizeof(clnt->cl_inline_name); - } - strlcpy(clnt->cl_server, args->servername, len); - - clnt->cl_xprt = xprt; + rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; @@ -203,7 +303,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru INIT_LIST_HEAD(&clnt->cl_tasks); spin_lock_init(&clnt->cl_lock); - if (!xprt_bound(clnt->cl_xprt)) + if (!xprt_bound(xprt)) clnt->cl_autobind = 1; clnt->cl_timeout = xprt->timeout; @@ -245,17 +345,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); - rpc_put_mount(); - } + rpc_clnt_remove_pipedir(clnt); out_no_path: kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); out_no_stats: - if (clnt->cl_server != clnt->cl_inline_name) - kfree(clnt->cl_server); kfree(clnt); out_err: xprt_put(xprt); @@ -285,6 +380,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, + .servername = args->servername, .bc_xprt = args->bc_xprt, }; char servername[48]; @@ -293,7 +389,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. */ - if (args->servername == NULL) { + if (xprtargs.servername == NULL) { struct sockaddr_un *sun = (struct sockaddr_un *)args->address; struct sockaddr_in *sin = @@ -320,7 +416,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * address family isn't recognized. */ return ERR_PTR(-EINVAL); } - args->servername = servername; + xprtargs.servername = servername; } xprt = xprt_create_transport(&xprtargs); @@ -373,6 +469,7 @@ struct rpc_clnt * rpc_clone_client(struct rpc_clnt *clnt) { struct rpc_clnt *new; + struct rpc_xprt *xprt; int err = -ENOMEM; new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); @@ -392,18 +489,25 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (xprt == NULL) + goto out_no_transport; + rcu_assign_pointer(new->cl_xprt, xprt); atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - xprt_get(clnt->cl_xprt); atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; out_no_path: + xprt_put(xprt); +out_no_transport: kfree(new->cl_principal); out_no_principal: rpc_free_iostats(new->cl_metrics); @@ -452,8 +556,9 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks); */ void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s\n", - clnt->cl_protname, clnt->cl_server); + dprintk_rcu("RPC: shutting down %s client for %s\n", + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); @@ -471,24 +576,17 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); static void rpc_free_client(struct rpc_clnt *clnt) { - dprintk("RPC: destroying %s client for %s\n", - clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); - rpc_put_mount(); - } - if (clnt->cl_parent != clnt) { + dprintk_rcu("RPC: destroying %s client for %s\n", + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - goto out_free; - } - if (clnt->cl_server != clnt->cl_inline_name) - kfree(clnt->cl_server); -out_free: rpc_unregister_client(clnt); + rpc_clnt_remove_pipedir(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; - xprt_put(clnt->cl_xprt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); rpciod_down(); kfree(clnt); } @@ -541,11 +639,11 @@ rpc_release_client(struct rpc_clnt *clnt) * The Sun NFSv2/v3 ACL protocol can do this. */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, - struct rpc_program *program, + const struct rpc_program *program, u32 vers) { struct rpc_clnt *clnt; - struct rpc_version *version; + const struct rpc_version *version; int err; BUG_ON(vers >= program->nrvers || !program->version[vers]); @@ -777,13 +875,18 @@ EXPORT_SYMBOL_GPL(rpc_call_start); size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) { size_t bytes; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; - bytes = sizeof(xprt->addr); + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + + bytes = xprt->addrlen; if (bytes > bufsize) bytes = bufsize; - memcpy(buf, &clnt->cl_xprt->addr, bytes); - return xprt->addrlen; + memcpy(buf, &xprt->addr, bytes); + rcu_read_unlock(); + + return bytes; } EXPORT_SYMBOL_GPL(rpc_peeraddr); @@ -792,11 +895,16 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); * @clnt: RPC client structure * @format: address format * + * NB: the lifetime of the memory referenced by the returned pointer is + * the same as the rpc_xprt itself. As long as the caller uses this + * pointer, it must hold the RCU read lock. */ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; + + xprt = rcu_dereference(clnt->cl_xprt); if (xprt->address_strings[format] != NULL) return xprt->address_strings[format]; @@ -805,17 +913,203 @@ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, } EXPORT_SYMBOL_GPL(rpc_peeraddr2str); +static const struct sockaddr_in rpc_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), +}; + +static const struct sockaddr_in6 rpc_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, +}; + +/* + * Try a getsockname() on a connected datagram socket. Using a + * connected datagram socket prevents leaving a socket in TIME_WAIT. + * This conserves the ephemeral port number space. + * + * Returns zero and fills in "buf" if successful; otherwise, a + * negative errno is returned. + */ +static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, + struct sockaddr *buf, int buflen) +{ + struct socket *sock; + int err; + + err = __sock_create(net, sap->sa_family, + SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create UDP socket (%d)\n", err); + goto out; + } + + switch (sap->sa_family) { + case AF_INET: + err = kernel_bind(sock, + (struct sockaddr *)&rpc_inaddr_loopback, + sizeof(rpc_inaddr_loopback)); + break; + case AF_INET6: + err = kernel_bind(sock, + (struct sockaddr *)&rpc_in6addr_loopback, + sizeof(rpc_in6addr_loopback)); + break; + default: + err = -EAFNOSUPPORT; + goto out; + } + if (err < 0) { + dprintk("RPC: can't bind UDP socket (%d)\n", err); + goto out_release; + } + + err = kernel_connect(sock, sap, salen, 0); + if (err < 0) { + dprintk("RPC: can't connect UDP socket (%d)\n", err); + goto out_release; + } + + err = kernel_getsockname(sock, buf, &buflen); + if (err < 0) { + dprintk("RPC: getsockname failed (%d)\n", err); + goto out_release; + } + + err = 0; + if (buf->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf; + sin6->sin6_scope_id = 0; + } + dprintk("RPC: %s succeeded\n", __func__); + +out_release: + sock_release(sock); +out: + return err; +} + +/* + * Scraping a connected socket failed, so we don't have a useable + * local address. Fallback: generate an address that will prevent + * the server from calling us back. + * + * Returns zero and fills in "buf" if successful; otherwise, a + * negative errno is returned. + */ +static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) +{ + switch (family) { + case AF_INET: + if (buflen < sizeof(rpc_inaddr_loopback)) + return -EINVAL; + memcpy(buf, &rpc_inaddr_loopback, + sizeof(rpc_inaddr_loopback)); + break; + case AF_INET6: + if (buflen < sizeof(rpc_in6addr_loopback)) + return -EINVAL; + memcpy(buf, &rpc_in6addr_loopback, + sizeof(rpc_in6addr_loopback)); + default: + dprintk("RPC: %s: address family not supported\n", + __func__); + return -EAFNOSUPPORT; + } + dprintk("RPC: %s: succeeded\n", __func__); + return 0; +} + +/** + * rpc_localaddr - discover local endpoint address for an RPC client + * @clnt: RPC client structure + * @buf: target buffer + * @buflen: size of target buffer, in bytes + * + * Returns zero and fills in "buf" and "buflen" if successful; + * otherwise, a negative errno is returned. + * + * This works even if the underlying transport is not currently connected, + * or if the upper layer never previously provided a source address. + * + * The result of this function call is transient: multiple calls in + * succession may give different results, depending on how local + * networking configuration changes over time. + */ +int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_xprt *xprt; + struct net *net; + size_t salen; + int err; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + salen = xprt->addrlen; + memcpy(sap, &xprt->addr, salen); + net = get_net(xprt->xprt_net); + rcu_read_unlock(); + + rpc_set_port(sap, 0); + err = rpc_sockname(net, sap, salen, buf, buflen); + put_net(net); + if (err != 0) + /* Couldn't discover local address, return ANYADDR */ + return rpc_anyaddr(sap->sa_family, buf, buflen); + return 0; +} +EXPORT_SYMBOL_GPL(rpc_localaddr); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); if (xprt->ops->set_buffer_size) xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_setbufsize); -/* - * Return size of largest payload RPC client can support, in bytes +/** + * rpc_protocol - Get transport protocol number for an RPC client + * @clnt: RPC client to query + * + */ +int rpc_protocol(struct rpc_clnt *clnt) +{ + int protocol; + + rcu_read_lock(); + protocol = rcu_dereference(clnt->cl_xprt)->prot; + rcu_read_unlock(); + return protocol; +} +EXPORT_SYMBOL_GPL(rpc_protocol); + +/** + * rpc_net_ns - Get the network namespace for this RPC client + * @clnt: RPC client to query + * + */ +struct net *rpc_net_ns(struct rpc_clnt *clnt) +{ + struct net *ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->xprt_net; + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_net_ns); + +/** + * rpc_max_payload - Get maximum payload size for a transport, in bytes + * @clnt: RPC client to query * * For stream transports, this is one RPC record fragment (see RFC * 1831), as we don't support multi-record requests yet. For datagram @@ -824,7 +1118,12 @@ EXPORT_SYMBOL_GPL(rpc_setbufsize); */ size_t rpc_max_payload(struct rpc_clnt *clnt) { - return clnt->cl_xprt->max_payload; + size_t ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->max_payload; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL_GPL(rpc_max_payload); @@ -835,8 +1134,11 @@ EXPORT_SYMBOL_GPL(rpc_max_payload); */ void rpc_force_rebind(struct rpc_clnt *clnt) { - if (clnt->cl_autobind) - xprt_clear_bound(clnt->cl_xprt); + if (clnt->cl_autobind) { + rcu_read_lock(); + xprt_clear_bound(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + } } EXPORT_SYMBOL_GPL(rpc_force_rebind); @@ -1162,6 +1464,7 @@ call_bind_status(struct rpc_task *task) return; } + trace_rpc_bind_status(task); switch (task->tk_status) { case -ENOMEM: dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); @@ -1261,6 +1564,7 @@ call_connect_status(struct rpc_task *task) return; } + trace_rpc_connect_status(task, status); switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: @@ -1449,6 +1753,7 @@ call_status(struct rpc_task *task) return; } + trace_rpc_call_status(task); task->tk_status = 0; switch(status) { case -EHOSTDOWN: @@ -1512,8 +1817,11 @@ call_timeout(struct rpc_task *task) } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); if (task->tk_flags & RPC_TASK_TIMEOUT) rpc_exit(task, -ETIMEDOUT); else @@ -1523,9 +1831,13 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - if (clnt->cl_chatty) + if (clnt->cl_chatty) { + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); + } } rpc_force_rebind(clnt); /* @@ -1554,9 +1866,13 @@ call_decode(struct rpc_task *task) dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - if (clnt->cl_chatty) + if (clnt->cl_chatty) { + rcu_read_lock(); printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + clnt->cl_protname, + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); + } task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1634,6 +1950,7 @@ rpc_encode_header(struct rpc_task *task) static __be32 * rpc_verify_header(struct rpc_task *task) { + struct rpc_clnt *clnt = task->tk_client; struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; __be32 *p = iov->iov_base; @@ -1706,8 +2023,11 @@ rpc_verify_header(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: + rcu_read_lock(); printk(KERN_NOTICE "RPC: server %s requires stronger " - "authentication.\n", task->tk_client->cl_server); + "authentication.\n", + rcu_dereference(clnt->cl_xprt)->servername); + rcu_read_unlock(); break; default: dprintk("RPC: %5u %s: unknown auth error: %x\n", @@ -1730,28 +2050,27 @@ rpc_verify_header(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - dprintk("RPC: %5u %s: program %u is unsupported by server %s\n", - task->tk_pid, __func__, - (unsigned int)task->tk_client->cl_prog, - task->tk_client->cl_server); + dprintk_rcu("RPC: %5u %s: program %u is unsupported " + "by server %s\n", task->tk_pid, __func__, + (unsigned int)clnt->cl_prog, + rcu_dereference(clnt->cl_xprt)->servername); error = -EPFNOSUPPORT; goto out_err; case RPC_PROG_MISMATCH: - dprintk("RPC: %5u %s: program %u, version %u unsupported by " - "server %s\n", task->tk_pid, __func__, - (unsigned int)task->tk_client->cl_prog, - (unsigned int)task->tk_client->cl_vers, - task->tk_client->cl_server); + dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported " + "by server %s\n", task->tk_pid, __func__, + (unsigned int)clnt->cl_prog, + (unsigned int)clnt->cl_vers, + rcu_dereference(clnt->cl_xprt)->servername); error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %s unsupported by program %u, " + dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, rpc_proc_name(task), - task->tk_client->cl_prog, - task->tk_client->cl_vers, - task->tk_client->cl_server); + clnt->cl_prog, clnt->cl_vers, + rcu_dereference(clnt->cl_xprt)->servername); error = -EOPNOTSUPP; goto out_err; case RPC_GARBAGE_ARGS: @@ -1765,7 +2084,7 @@ rpc_verify_header(struct rpc_task *task) } out_garbage: - task->tk_client->cl_stats->rpcgarbage++; + clnt->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; dprintk("RPC: %5u %s: retrying\n", @@ -1851,14 +2170,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt, task->tk_action, rpc_waitq); } -void rpc_show_tasks(void) +void rpc_show_tasks(struct net *net) { struct rpc_clnt *clnt; struct rpc_task *task; int header = 0; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - spin_lock(&rpc_client_lock); - list_for_each_entry(clnt, &all_clients, cl_clients) { + spin_lock(&sn->rpc_client_lock); + list_for_each_entry(clnt, &sn->all_clients, cl_clients) { spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { @@ -1869,6 +2189,6 @@ void rpc_show_tasks(void) } spin_unlock(&clnt->cl_lock); } - spin_unlock(&rpc_client_lock); + spin_unlock(&sn->rpc_client_lock); } #endif diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index d013bf211ca..ce7bd449173 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -9,6 +9,20 @@ struct cache_detail; struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; + struct cache_detail *unix_gid_cache; + struct cache_detail *rsc_cache; + struct cache_detail *rsi_cache; + + struct super_block *pipefs_sb; + struct mutex pipefs_sb_lock; + + struct list_head all_clients; + spinlock_t rpc_client_lock; + + struct rpc_clnt *rpcb_local_clnt; + struct rpc_clnt *rpcb_local_clnt4; + spinlock_t rpcb_clnt_lock; + unsigned int rpcb_users; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7d6dd6efbdb..c84c0e0c41c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -16,9 +16,9 @@ #include <linux/namei.h> #include <linux/fsnotify.h> #include <linux/kernel.h> +#include <linux/rcupdate.h> #include <asm/ioctls.h> -#include <linux/fs.h> #include <linux/poll.h> #include <linux/wait.h> #include <linux/seq_file.h> @@ -27,9 +27,15 @@ #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/cache.h> +#include <linux/nsproxy.h> +#include <linux/notifier.h> -static struct vfsmount *rpc_mnt __read_mostly; -static int rpc_mount_count; +#include "netns.h" +#include "sunrpc.h" + +#define RPCDBG_FACILITY RPCDBG_DEBUG + +#define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") static struct file_system_type rpc_pipe_fs_type; @@ -38,7 +44,21 @@ static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) -static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, +static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); + +int rpc_pipefs_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); + +void rpc_pipefs_notifier_unregister(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); + +static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; @@ -51,30 +71,31 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(&rpci->waitq); + wake_up(waitq); } static void rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); - struct rpc_inode *rpci = - container_of(work, struct rpc_inode, queue_timeout.work); - struct inode *inode = &rpci->vfs_inode; + struct rpc_pipe *pipe = + container_of(work, struct rpc_pipe, queue_timeout.work); void (*destroy_msg)(struct rpc_pipe_msg *); + struct dentry *dentry; - spin_lock(&inode->i_lock); - if (rpci->ops == NULL) { - spin_unlock(&inode->i_lock); - return; + spin_lock(&pipe->lock); + destroy_msg = pipe->ops->destroy_msg; + if (pipe->nreaders == 0) { + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; } - destroy_msg = rpci->ops->destroy_msg; - if (rpci->nreaders == 0) { - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; + dentry = dget(pipe->dentry); + spin_unlock(&pipe->lock); + if (dentry) { + rpc_purge_list(&RPC_I(dentry->d_inode)->waitq, + &free_list, destroy_msg, -ETIMEDOUT); + dput(dentry); } - spin_unlock(&inode->i_lock); - rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, @@ -108,30 +129,31 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); * initialize the fields of @msg (other than @msg->list) appropriately. */ int -rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) +rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) { - struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; + struct dentry *dentry; - spin_lock(&inode->i_lock); - if (rpci->ops == NULL) - goto out; - if (rpci->nreaders) { - list_add_tail(&msg->list, &rpci->pipe); - rpci->pipelen += msg->len; + spin_lock(&pipe->lock); + if (pipe->nreaders) { + list_add_tail(&msg->list, &pipe->pipe); + pipe->pipelen += msg->len; res = 0; - } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { - if (list_empty(&rpci->pipe)) + } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&pipe->pipe)) queue_delayed_work(rpciod_workqueue, - &rpci->queue_timeout, + &pipe->queue_timeout, RPC_UPCALL_TIMEOUT); - list_add_tail(&msg->list, &rpci->pipe); - rpci->pipelen += msg->len; + list_add_tail(&msg->list, &pipe->pipe); + pipe->pipelen += msg->len; res = 0; } -out: - spin_unlock(&inode->i_lock); - wake_up(&rpci->waitq); + dentry = dget(pipe->dentry); + spin_unlock(&pipe->lock); + if (dentry) { + wake_up(&RPC_I(dentry->d_inode)->waitq); + dput(dentry); + } return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); @@ -145,29 +167,26 @@ rpc_inode_setowner(struct inode *inode, void *private) static void rpc_close_pipes(struct inode *inode) { - struct rpc_inode *rpci = RPC_I(inode); - const struct rpc_pipe_ops *ops; + struct rpc_pipe *pipe = RPC_I(inode)->pipe; int need_release; + LIST_HEAD(free_list); mutex_lock(&inode->i_mutex); - ops = rpci->ops; - if (ops != NULL) { - LIST_HEAD(free_list); - spin_lock(&inode->i_lock); - need_release = rpci->nreaders != 0 || rpci->nwriters != 0; - rpci->nreaders = 0; - list_splice_init(&rpci->in_upcall, &free_list); - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; - rpci->ops = NULL; - spin_unlock(&inode->i_lock); - rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE); - rpci->nwriters = 0; - if (need_release && ops->release_pipe) - ops->release_pipe(inode); - cancel_delayed_work_sync(&rpci->queue_timeout); - } + spin_lock(&pipe->lock); + need_release = pipe->nreaders != 0 || pipe->nwriters != 0; + pipe->nreaders = 0; + list_splice_init(&pipe->in_upcall, &free_list); + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; + pipe->dentry = NULL; + spin_unlock(&pipe->lock); + rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE); + pipe->nwriters = 0; + if (need_release && pipe->ops->release_pipe) + pipe->ops->release_pipe(inode); + cancel_delayed_work_sync(&pipe->queue_timeout); rpc_inode_setowner(inode, NULL); + RPC_I(inode)->pipe = NULL; mutex_unlock(&inode->i_mutex); } @@ -197,23 +216,24 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe; int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) goto out; - first_open = rpci->nreaders == 0 && rpci->nwriters == 0; - if (first_open && rpci->ops->open_pipe) { - res = rpci->ops->open_pipe(inode); + first_open = pipe->nreaders == 0 && pipe->nwriters == 0; + if (first_open && pipe->ops->open_pipe) { + res = pipe->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) - rpci->nreaders++; + pipe->nreaders++; if (filp->f_mode & FMODE_WRITE) - rpci->nwriters++; + pipe->nwriters++; res = 0; out: mutex_unlock(&inode->i_mutex); @@ -223,38 +243,39 @@ out: static int rpc_pipe_release(struct inode *inode, struct file *filp) { - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int last_close; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) goto out; msg = filp->private_data; if (msg != NULL) { - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); msg->errno = -EAGAIN; list_del_init(&msg->list); - spin_unlock(&inode->i_lock); - rpci->ops->destroy_msg(msg); + spin_unlock(&pipe->lock); + pipe->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) - rpci->nwriters --; + pipe->nwriters --; if (filp->f_mode & FMODE_READ) { - rpci->nreaders --; - if (rpci->nreaders == 0) { + pipe->nreaders --; + if (pipe->nreaders == 0) { LIST_HEAD(free_list); - spin_lock(&inode->i_lock); - list_splice_init(&rpci->pipe, &free_list); - rpci->pipelen = 0; - spin_unlock(&inode->i_lock); - rpc_purge_list(rpci, &free_list, - rpci->ops->destroy_msg, -EAGAIN); + spin_lock(&pipe->lock); + list_splice_init(&pipe->pipe, &free_list); + pipe->pipelen = 0; + spin_unlock(&pipe->lock); + rpc_purge_list(&RPC_I(inode)->waitq, &free_list, + pipe->ops->destroy_msg, -EAGAIN); } } - last_close = rpci->nwriters == 0 && rpci->nreaders == 0; - if (last_close && rpci->ops->release_pipe) - rpci->ops->release_pipe(inode); + last_close = pipe->nwriters == 0 && pipe->nreaders == 0; + if (last_close && pipe->ops->release_pipe) + pipe->ops->release_pipe(inode); out: mutex_unlock(&inode->i_mutex); return 0; @@ -264,39 +285,40 @@ static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; mutex_lock(&inode->i_mutex); - if (rpci->ops == NULL) { + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { - spin_lock(&inode->i_lock); - if (!list_empty(&rpci->pipe)) { - msg = list_entry(rpci->pipe.next, + spin_lock(&pipe->lock); + if (!list_empty(&pipe->pipe)) { + msg = list_entry(pipe->pipe.next, struct rpc_pipe_msg, list); - list_move(&msg->list, &rpci->in_upcall); - rpci->pipelen -= msg->len; + list_move(&msg->list, &pipe->in_upcall); + pipe->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ - res = rpci->ops->upcall(filp, msg, buf, len); + res = pipe->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; - spin_lock(&inode->i_lock); + spin_lock(&pipe->lock); list_del_init(&msg->list); - spin_unlock(&inode->i_lock); - rpci->ops->destroy_msg(msg); + spin_unlock(&pipe->lock); + pipe->ops->destroy_msg(msg); } out_unlock: mutex_unlock(&inode->i_mutex); @@ -307,13 +329,12 @@ static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); int res; mutex_lock(&inode->i_mutex); res = -EPIPE; - if (rpci->ops != NULL) - res = rpci->ops->downcall(filp, buf, len); + if (RPC_I(inode)->pipe != NULL) + res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; } @@ -321,17 +342,18 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct rpc_inode *rpci; - unsigned int mask = 0; + struct inode *inode = filp->f_path.dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + unsigned int mask = POLLOUT | POLLWRNORM; - rpci = RPC_I(filp->f_path.dentry->d_inode); poll_wait(filp, &rpci->waitq, wait); - mask = POLLOUT | POLLWRNORM; - if (rpci->ops == NULL) + mutex_lock(&inode->i_mutex); + if (rpci->pipe == NULL) mask |= POLLERR | POLLHUP; - if (filp->private_data || !list_empty(&rpci->pipe)) + else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) mask |= POLLIN | POLLRDNORM; + mutex_unlock(&inode->i_mutex); return mask; } @@ -339,23 +361,26 @@ static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + struct rpc_pipe *pipe; int len; switch (cmd) { case FIONREAD: - spin_lock(&inode->i_lock); - if (rpci->ops == NULL) { - spin_unlock(&inode->i_lock); + mutex_lock(&inode->i_mutex); + pipe = RPC_I(inode)->pipe; + if (pipe == NULL) { + mutex_unlock(&inode->i_mutex); return -EPIPE; } - len = rpci->pipelen; + spin_lock(&pipe->lock); + len = pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } - spin_unlock(&inode->i_lock); + spin_unlock(&pipe->lock); + mutex_unlock(&inode->i_mutex); return put_user(len, (int __user *)arg); default: return -EINVAL; @@ -378,12 +403,15 @@ rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; - seq_printf(m, "RPC server: %s\n", clnt->cl_server); + rcu_read_lock(); + seq_printf(m, "RPC server: %s\n", + rcu_dereference(clnt->cl_xprt)->servername); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); + rcu_read_unlock(); return 0; } @@ -440,23 +468,6 @@ struct rpc_filelist { umode_t mode; }; -struct vfsmount *rpc_get_mount(void) -{ - int err; - - err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count); - if (err != 0) - return ERR_PTR(err); - return rpc_mnt; -} -EXPORT_SYMBOL_GPL(rpc_get_mount); - -void rpc_put_mount(void) -{ - simple_release_fs(&rpc_mnt, &rpc_mount_count); -} -EXPORT_SYMBOL_GPL(rpc_put_mount); - static int rpc_delete_dentry(const struct dentry *dentry) { return 1; @@ -540,12 +551,47 @@ static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, return 0; } -static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private, - const struct rpc_pipe_ops *ops, - int flags) +static void +init_pipe(struct rpc_pipe *pipe) +{ + pipe->nreaders = 0; + pipe->nwriters = 0; + INIT_LIST_HEAD(&pipe->in_upcall); + INIT_LIST_HEAD(&pipe->in_downcall); + INIT_LIST_HEAD(&pipe->pipe); + pipe->pipelen = 0; + INIT_DELAYED_WORK(&pipe->queue_timeout, + rpc_timeout_upcall_queue); + pipe->ops = NULL; + spin_lock_init(&pipe->lock); + pipe->dentry = NULL; +} + +void rpc_destroy_pipe_data(struct rpc_pipe *pipe) +{ + kfree(pipe); +} +EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data); + +struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) +{ + struct rpc_pipe *pipe; + + pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); + if (!pipe) + return ERR_PTR(-ENOMEM); + init_pipe(pipe); + pipe->ops = ops; + pipe->flags = flags; + return pipe; +} +EXPORT_SYMBOL_GPL(rpc_mkpipe_data); + +static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + struct rpc_pipe *pipe) { struct rpc_inode *rpci; int err; @@ -554,10 +600,8 @@ static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, if (err) return err; rpci = RPC_I(dentry->d_inode); - rpci->nkern_readwriters = 1; rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; + rpci->pipe = pipe; fsnotify_create(dir, dentry); return 0; } @@ -573,6 +617,22 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) return ret; } +int rpc_rmdir(struct dentry *dentry) +{ + struct dentry *parent; + struct inode *dir; + int error; + + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + error = __rpc_rmdir(dir, dentry); + mutex_unlock(&dir->i_mutex); + dput(parent); + return error; +} +EXPORT_SYMBOL_GPL(rpc_rmdir); + static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; @@ -587,16 +647,12 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); - rpci->nkern_readwriters--; - if (rpci->nkern_readwriters != 0) - return 0; rpc_close_pipes(inode); return __rpc_unlink(dir, dentry); } -static struct dentry *__rpc_lookup_create(struct dentry *parent, +static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, struct qstr *name) { struct dentry *dentry; @@ -604,27 +660,13 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, dentry = d_lookup(parent, name); if (!dentry) { dentry = d_alloc(parent, name); - if (!dentry) { - dentry = ERR_PTR(-ENOMEM); - goto out_err; - } + if (!dentry) + return ERR_PTR(-ENOMEM); } - if (!dentry->d_inode) + if (dentry->d_inode == NULL) { d_set_d_op(dentry, &rpc_dentry_operations); -out_err: - return dentry; -} - -static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, - struct qstr *name) -{ - struct dentry *dentry; - - dentry = __rpc_lookup_create(parent, name); - if (IS_ERR(dentry)) - return dentry; - if (dentry->d_inode == NULL) return dentry; + } dput(dentry); return ERR_PTR(-EEXIST); } @@ -779,7 +821,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * @private: private data to associate with the pipe, for the caller's use * @ops: operations defining the behavior of the pipe: upcall, downcall, * release_pipe, open_pipe, and destroy_msg. - * @flags: rpc_inode flags + * @flags: rpc_pipe flags * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to @@ -792,9 +834,8 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, - void *private, const struct rpc_pipe_ops *ops, - int flags) +struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, + void *private, struct rpc_pipe *pipe) { struct dentry *dentry; struct inode *dir = parent->d_inode; @@ -802,9 +843,9 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, struct qstr q; int err; - if (ops->upcall == NULL) + if (pipe->ops->upcall == NULL) umode &= ~S_IRUGO; - if (ops->downcall == NULL) + if (pipe->ops->downcall == NULL) umode &= ~S_IWUGO; q.name = name; @@ -812,24 +853,11 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, q.hash = full_name_hash(q.name, q.len), mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, &q); if (IS_ERR(dentry)) goto out; - if (dentry->d_inode) { - struct rpc_inode *rpci = RPC_I(dentry->d_inode); - if (rpci->private != private || - rpci->ops != ops || - rpci->flags != flags) { - dput (dentry); - err = -EBUSY; - goto out_err; - } - rpci->nkern_readwriters++; - goto out; - } - - err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, - private, ops, flags); + err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, + private, pipe); if (err) goto out_err; out: @@ -842,7 +870,7 @@ out_err: err); goto out; } -EXPORT_SYMBOL_GPL(rpc_mkpipe); +EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); /** * rpc_unlink - remove a pipe @@ -915,7 +943,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @dentry: directory to remove + * @clnt: rpc client */ int rpc_remove_client_dir(struct dentry *dentry) { @@ -1020,11 +1048,64 @@ static const struct rpc_filelist files[] = { }, }; +/* + * This call can be used only in RPC pipefs mount notification hooks. + */ +struct dentry *rpc_d_lookup_sb(const struct super_block *sb, + const unsigned char *dir_name) +{ + struct qstr dir = { + .name = dir_name, + .len = strlen(dir_name), + .hash = full_name_hash(dir_name, strlen(dir_name)), + }; + + return d_lookup(sb->s_root, &dir); +} +EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); + +void rpc_pipefs_init_net(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_init(&sn->pipefs_sb_lock); +} + +/* + * This call will be used for per network namespace operations calls. + * Note: Function will be returned with pipefs_sb_lock taken if superblock was + * found. This lock have to be released by rpc_put_sb_net() when all operations + * will be completed. + */ +struct super_block *rpc_get_sb_net(const struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_lock(&sn->pipefs_sb_lock); + if (sn->pipefs_sb) + return sn->pipefs_sb; + mutex_unlock(&sn->pipefs_sb_lock); + return NULL; +} +EXPORT_SYMBOL_GPL(rpc_get_sb_net); + +void rpc_put_sb_net(const struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + BUG_ON(sn->pipefs_sb == NULL); + mutex_unlock(&sn->pipefs_sb_lock); +} +EXPORT_SYMBOL_GPL(rpc_put_sb_net); + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct dentry *root; + struct net *net = data; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + int err; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1038,21 +1119,54 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; + dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, + NET_NAME(net)); + err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_MOUNT, + sb); + if (err) + goto err_depopulate; + sb->s_fs_info = get_net(net); + sn->pipefs_sb = sb; return 0; + +err_depopulate: + blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_UMOUNT, + sb); + __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); + return err; } static struct dentry * rpc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, rpc_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); +} + +static void rpc_kill_sb(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_lock(&sn->pipefs_sb_lock); + sn->pipefs_sb = NULL; + mutex_unlock(&sn->pipefs_sb_lock); + put_net(net); + dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, + NET_NAME(net)); + blocking_notifier_call_chain(&rpc_pipefs_notifier_list, + RPC_PIPEFS_UMOUNT, + sb); + kill_litter_super(sb); } static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", .mount = rpc_mount, - .kill_sb = kill_litter_super, + .kill_sb = rpc_kill_sb, }; static void @@ -1062,16 +1176,8 @@ init_once(void *foo) inode_init_once(&rpci->vfs_inode); rpci->private = NULL; - rpci->nreaders = 0; - rpci->nwriters = 0; - INIT_LIST_HEAD(&rpci->in_upcall); - INIT_LIST_HEAD(&rpci->in_downcall); - INIT_LIST_HEAD(&rpci->pipe); - rpci->pipelen = 0; + rpci->pipe = NULL; init_waitqueue_head(&rpci->waitq); - INIT_DELAYED_WORK(&rpci->queue_timeout, - rpc_timeout_upcall_queue); - rpci->ops = NULL; } int register_rpc_pipefs(void) @@ -1085,17 +1191,24 @@ int register_rpc_pipefs(void) init_once); if (!rpc_inode_cachep) return -ENOMEM; + err = rpc_clients_notifier_register(); + if (err) + goto err_notifier; err = register_filesystem(&rpc_pipe_fs_type); - if (err) { - kmem_cache_destroy(rpc_inode_cachep); - return err; - } - + if (err) + goto err_register; return 0; + +err_register: + rpc_clients_notifier_unregister(); +err_notifier: + kmem_cache_destroy(rpc_inode_cachep); + return err; } void unregister_rpc_pipefs(void) { + rpc_clients_notifier_unregister(); kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 8761bf8e36f..207a74696c9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -23,12 +23,15 @@ #include <linux/errno.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/nsproxy.h> #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> +#include "netns.h" + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND #endif @@ -109,13 +112,7 @@ enum { static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); -static struct rpc_program rpcb_program; - -static struct rpc_clnt * rpcb_local_clnt; -static struct rpc_clnt * rpcb_local_clnt4; - -DEFINE_SPINLOCK(rpcb_clnt_lock); -unsigned int rpcb_users; +static const struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -140,8 +137,8 @@ struct rpcb_info { struct rpc_procinfo * rpc_proc; }; -static struct rpcb_info rpcb_next_version[]; -static struct rpcb_info rpcb_next_version6[]; +static const struct rpcb_info rpcb_next_version[]; +static const struct rpcb_info rpcb_next_version6[]; static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, @@ -164,32 +161,34 @@ static void rpcb_map_release(void *data) kfree(map); } -static int rpcb_get_local(void) +static int rpcb_get_local(struct net *net) { int cnt; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - spin_lock(&rpcb_clnt_lock); - if (rpcb_users) - rpcb_users++; - cnt = rpcb_users; - spin_unlock(&rpcb_clnt_lock); + spin_lock(&sn->rpcb_clnt_lock); + if (sn->rpcb_users) + sn->rpcb_users++; + cnt = sn->rpcb_users; + spin_unlock(&sn->rpcb_clnt_lock); return cnt; } -void rpcb_put_local(void) +void rpcb_put_local(struct net *net) { - struct rpc_clnt *clnt = rpcb_local_clnt; - struct rpc_clnt *clnt4 = rpcb_local_clnt4; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct rpc_clnt *clnt = sn->rpcb_local_clnt; + struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; int shutdown; - spin_lock(&rpcb_clnt_lock); - if (--rpcb_users == 0) { - rpcb_local_clnt = NULL; - rpcb_local_clnt4 = NULL; + spin_lock(&sn->rpcb_clnt_lock); + if (--sn->rpcb_users == 0) { + sn->rpcb_local_clnt = NULL; + sn->rpcb_local_clnt4 = NULL; } - shutdown = !rpcb_users; - spin_unlock(&rpcb_clnt_lock); + shutdown = !sn->rpcb_users; + spin_unlock(&sn->rpcb_clnt_lock); if (shutdown) { /* @@ -202,30 +201,34 @@ void rpcb_put_local(void) } } -static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) +static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, + struct rpc_clnt *clnt4) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + /* Protected by rpcb_create_local_mutex */ - rpcb_local_clnt = clnt; - rpcb_local_clnt4 = clnt4; + sn->rpcb_local_clnt = clnt; + sn->rpcb_local_clnt4 = clnt4; smp_wmb(); - rpcb_users = 1; + sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, - rpcb_local_clnt4); + "%p, rpcb_local_clnt4: %p) for net %p%s\n", + sn->rpcb_local_clnt, sn->rpcb_local_clnt4, + net, (net == &init_net) ? " (init_net)" : ""); } /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_unix(void) +static int rpcb_create_local_unix(struct net *net) { static const struct sockaddr_un rpcb_localaddr_rpcbind = { .sun_family = AF_LOCAL, .sun_path = RPCBIND_SOCK_PATHNAME, }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_LOCAL, .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, .addrsize = sizeof(rpcb_localaddr_rpcbind), @@ -258,7 +261,7 @@ static int rpcb_create_local_unix(void) clnt4 = NULL; } - rpcb_set_local(clnt, clnt4); + rpcb_set_local(net, clnt, clnt4); out: return result; @@ -268,7 +271,7 @@ out: * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_net(void) +static int rpcb_create_local_net(struct net *net) { static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_family = AF_INET, @@ -276,7 +279,7 @@ static int rpcb_create_local_net(void) .sin_port = htons(RPCBIND_PORT), }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), @@ -310,7 +313,7 @@ static int rpcb_create_local_net(void) clnt4 = NULL; } - rpcb_set_local(clnt, clnt4); + rpcb_set_local(net, clnt, clnt4); out: return result; @@ -320,31 +323,32 @@ out: * Returns zero on success, otherwise a negative errno value * is returned. */ -int rpcb_create_local(void) +int rpcb_create_local(struct net *net) { static DEFINE_MUTEX(rpcb_create_local_mutex); int result = 0; - if (rpcb_get_local()) + if (rpcb_get_local(net)) return result; mutex_lock(&rpcb_create_local_mutex); - if (rpcb_get_local()) + if (rpcb_get_local(net)) goto out; - if (rpcb_create_local_unix() != 0) - result = rpcb_create_local_net(); + if (rpcb_create_local_unix(net) != 0) + result = rpcb_create_local_net(net); out: mutex_unlock(&rpcb_create_local_mutex); return result; } -static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version) +static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, + struct sockaddr *srvaddr, size_t salen, + int proto, u32 version) { struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = proto, .address = srvaddr, .addrsize = salen, @@ -420,7 +424,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 * addresses). */ -int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) +int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short port) { struct rpcbind_args map = { .r_prog = prog, @@ -431,6 +435,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -440,13 +445,14 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(rpcb_local_clnt, &msg); + return rpcb_register_call(sn->rpcb_local_clnt, &msg); } /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_inet4(const struct sockaddr *sap, +static int rpcb_register_inet4(struct sunrpc_net *sn, + const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; @@ -465,7 +471,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(rpcb_local_clnt4, msg); + result = rpcb_register_call(sn->rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -473,7 +479,8 @@ static int rpcb_register_inet4(const struct sockaddr *sap, /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_inet6(const struct sockaddr *sap, +static int rpcb_register_inet6(struct sunrpc_net *sn, + const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; @@ -492,12 +499,13 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(rpcb_local_clnt4, msg); + result = rpcb_register_call(sn->rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } -static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) +static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, + struct rpc_message *msg) { struct rpcbind_args *map = msg->rpc_argp; @@ -508,7 +516,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(rpcb_local_clnt4, msg); + return rpcb_register_call(sn->rpcb_local_clnt4, msg); } /** @@ -554,7 +562,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) * service on any IPv4 address, but not on IPv6. The latter * advertises the service on all IPv4 and IPv6 addresses. */ -int rpcb_v4_register(const u32 program, const u32 version, +int rpcb_v4_register(struct net *net, const u32 program, const u32 version, const struct sockaddr *address, const char *netid) { struct rpcbind_args map = { @@ -566,18 +574,19 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - if (rpcb_local_clnt4 == NULL) + if (sn->rpcb_local_clnt4 == NULL) return -EPROTONOSUPPORT; if (address == NULL) - return rpcb_unregister_all_protofamilies(&msg); + return rpcb_unregister_all_protofamilies(sn, &msg); switch (address->sa_family) { case AF_INET: - return rpcb_register_inet4(address, &msg); + return rpcb_register_inet4(sn, address, &msg); case AF_INET6: - return rpcb_register_inet6(address, &msg); + return rpcb_register_inet6(sn, address, &msg); } return -EAFNOSUPPORT; @@ -611,9 +620,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) { struct rpc_clnt *parent = clnt->cl_parent; + struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt); while (parent != clnt) { - if (parent->cl_xprt != clnt->cl_xprt) + if (rcu_dereference(parent->cl_xprt) != xprt) break; if (clnt->cl_autobind) break; @@ -644,12 +654,16 @@ void rpcb_getport_async(struct rpc_task *task) size_t salen; int status; - clnt = rpcb_find_transport_owner(task->tk_client); - xprt = clnt->cl_xprt; + rcu_read_lock(); + do { + clnt = rpcb_find_transport_owner(task->tk_client); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + } while (xprt == NULL); + rcu_read_unlock(); dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, - clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); + xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ @@ -658,6 +672,7 @@ void rpcb_getport_async(struct rpc_task *task) if (xprt_test_and_set_binding(xprt)) { dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); + xprt_put(xprt); return; } @@ -699,8 +714,8 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version); + rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen, + xprt->prot, bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -725,7 +740,7 @@ void rpcb_getport_async(struct rpc_task *task) switch (bind_version) { case RPCBVERS_4: case RPCBVERS_3: - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; @@ -754,6 +769,7 @@ bailout_release_client: bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); task->tk_status = status; + xprt_put(xprt); } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -801,11 +817,11 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); @@ -818,7 +834,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; unsigned long port; __be32 *p; @@ -829,8 +844,8 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, port); + dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -841,7 +856,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, unsigned int *boolp) { - struct rpc_task *task = req->rq_task; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -853,7 +867,8 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 1; dprintk("RPC: %5u RPCB_%s call %s\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } @@ -873,11 +888,11 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_netid, rpcb->r_addr); @@ -895,7 +910,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; - struct rpc_task *task = req->rq_task; __be32 *p; u32 len; @@ -912,7 +926,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, */ if (len == 0) { dprintk("RPC: %5u RPCB reply: program not registered\n", - task->tk_pid); + req->rq_task->tk_pid); return 0; } @@ -922,10 +936,11 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, (char *)p); + dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, (char *)p); - if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) + if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, + sap, sizeof(address)) == 0) goto out_fail; rpcb->r_port = rpc_get_port(sap); @@ -933,7 +948,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, out_fail: dprintk("RPC: %5u malformed RPCB_%s reply\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name); + req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } @@ -1041,7 +1057,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, }; -static struct rpcb_info rpcb_next_version[] = { +static const struct rpcb_info rpcb_next_version[] = { { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -1051,7 +1067,7 @@ static struct rpcb_info rpcb_next_version[] = { }, }; -static struct rpcb_info rpcb_next_version6[] = { +static const struct rpcb_info rpcb_next_version6[] = { { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -1065,25 +1081,25 @@ static struct rpcb_info rpcb_next_version6[] = { }, }; -static struct rpc_version rpcb_version2 = { +static const struct rpc_version rpcb_version2 = { .number = RPCBVERS_2, .nrprocs = ARRAY_SIZE(rpcb_procedures2), .procs = rpcb_procedures2 }; -static struct rpc_version rpcb_version3 = { +static const struct rpc_version rpcb_version3 = { .number = RPCBVERS_3, .nrprocs = ARRAY_SIZE(rpcb_procedures3), .procs = rpcb_procedures3 }; -static struct rpc_version rpcb_version4 = { +static const struct rpc_version rpcb_version4 = { .number = RPCBVERS_4, .nrprocs = ARRAY_SIZE(rpcb_procedures4), .procs = rpcb_procedures4 }; -static struct rpc_version *rpcb_version[] = { +static const struct rpc_version *rpcb_version[] = { NULL, NULL, &rpcb_version2, @@ -1093,7 +1109,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -static struct rpc_program rpcb_program = { +static const struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3341d896278..994cfea2bad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -28,6 +28,9 @@ #define RPCDBG_FACILITY RPCDBG_SCHED #endif +#define CREATE_TRACE_POINTS +#include <trace/events/sunrpc.h> + /* * RPC slabs and memory pools */ @@ -205,9 +208,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->qlen = 0; setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); INIT_LIST_HEAD(&queue->timer_list.list); -#ifdef RPC_DEBUG - queue->name = qname; -#endif + rpc_assign_waitqueue_name(queue, qname); } void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) @@ -251,6 +252,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { + trace_rpc_task_begin(task->tk_client, task, NULL); + rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } @@ -267,6 +270,8 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; + trace_rpc_task_complete(task->tk_client, task, NULL); + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); @@ -324,6 +329,8 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); + trace_rpc_task_sleep(task->tk_client, task, q); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); @@ -378,6 +385,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } + trace_rpc_task_wakeup(task->tk_client, task, queue); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -422,7 +431,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) +static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -467,30 +476,54 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - rpc_wake_up_task_queue_locked(queue, task); return task; } +static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) +{ + if (RPC_IS_PRIORITY(queue)) + return __rpc_find_next_queued_priority(queue); + if (!list_empty(&queue->tasks[0])) + return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); + return NULL; +} + /* - * Wake up the next task on the wait queue. + * Wake up the first task on the wait queue. */ -struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_next(%p \"%s\")\n", + dprintk("RPC: wake_up_first(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - if (RPC_IS_PRIORITY(queue)) - task = __rpc_wake_up_next_priority(queue); - else { - task_for_first(task, &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task != NULL) { + if (func(task, data)) rpc_wake_up_task_queue_locked(queue, task); + else + task = NULL; } spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_first); + +static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) +{ + return true; +} + +/* + * Wake up the next task on the wait queue. +*/ +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) +{ + return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); +} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** @@ -501,14 +534,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -526,13 +563,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } @@ -677,6 +717,7 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } + trace_rpc_task_run_action(task->tk_client, task, task->tk_action); do_action(task); /* diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 80df89d957b..bc2068ee795 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -22,6 +22,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/metrics.h> +#include <linux/rcupdate.h> #include "netns.h" @@ -133,20 +134,19 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); /** * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task + * @stats: array of stat structures * * Relies on the caller for serialization. */ -void rpc_count_iostats(struct rpc_task *task) +void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats; struct rpc_iostats *op_metrics; ktime_t delta; - if (!task->tk_client || !task->tk_client->cl_metrics || !req) + if (!stats || !req) return; - stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; @@ -164,6 +164,7 @@ void rpc_count_iostats(struct rpc_task *task) delta = ktime_sub(ktime_get(), task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); } +EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs) @@ -179,7 +180,7 @@ static void _print_name(struct seq_file *seq, unsigned int op, void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) { struct rpc_iostats *stats = clnt->cl_metrics; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt; unsigned int op, maxproc = clnt->cl_maxproc; if (!stats) @@ -189,8 +190,11 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) seq_printf(seq, "p/v: %u/%u (%s)\n", clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); if (xprt) xprt->ops->print_stats(xprt, seq); + rcu_read_unlock(); seq_printf(seq, "\tper-op statistics\n"); for (op = 0; op < maxproc; op++) { @@ -213,45 +217,46 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); * Register/unregister RPC proc files */ static inline struct proc_dir_entry * -do_register(const char *name, void *data, const struct file_operations *fops) +do_register(struct net *net, const char *name, void *data, + const struct file_operations *fops) { struct sunrpc_net *sn; dprintk("RPC: registering /proc/net/rpc/%s\n", name); - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); return proc_create_data(name, 0, sn->proc_net_rpc, fops, data); } struct proc_dir_entry * -rpc_proc_register(struct rpc_stat *statp) +rpc_proc_register(struct net *net, struct rpc_stat *statp) { - return do_register(statp->program->name, statp, &rpc_proc_fops); + return do_register(net, statp->program->name, statp, &rpc_proc_fops); } EXPORT_SYMBOL_GPL(rpc_proc_register); void -rpc_proc_unregister(const char *name) +rpc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * -svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) +svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops) { - return do_register(statp->program->pg_name, statp, fops); + return do_register(net, statp->program->pg_name, statp, fops); } EXPORT_SYMBOL_GPL(svc_proc_register); void -svc_proc_unregister(const char *name) +svc_proc_unregister(struct net *net, const char *name) { struct sunrpc_net *sn; - sn = net_generic(&init_net, sunrpc_net_id); + sn = net_generic(net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 90c292e2738..14c9f6d1c5f 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -47,5 +47,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); +int rpc_clients_notifier_register(void); +void rpc_clients_notifier_unregister(void); #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8ec9778c3f4..8adfc88e793 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -25,10 +25,12 @@ #include "netns.h" int sunrpc_net_id; +EXPORT_SYMBOL_GPL(sunrpc_net_id); static __net_init int sunrpc_init_net(struct net *net) { int err; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); err = rpc_proc_init(net); if (err) @@ -38,8 +40,18 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_ipmap; + err = unix_gid_cache_create(net); + if (err) + goto err_unixgid; + + rpc_pipefs_init_net(net); + INIT_LIST_HEAD(&sn->all_clients); + spin_lock_init(&sn->rpc_client_lock); + spin_lock_init(&sn->rpcb_clnt_lock); return 0; +err_unixgid: + ip_map_cache_destroy(net); err_ipmap: rpc_proc_exit(net); err_proc: @@ -48,6 +60,7 @@ err_proc: static __net_exit void sunrpc_exit_net(struct net *net) { + unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); } @@ -59,8 +72,6 @@ static struct pernet_operations sunrpc_net_ops = { .size = sizeof(struct sunrpc_net), }; -extern struct cache_detail unix_gid_cache; - static int __init init_sunrpc(void) { @@ -82,7 +93,6 @@ init_sunrpc(void) #ifdef RPC_DEBUG rpc_register_sysctl(); #endif - cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; @@ -105,7 +115,6 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&unix_gid_cache); unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e4aabc02368..4153846984a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/slab.h> +#include <linux/nsproxy.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -30,7 +31,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP -static void svc_unregister(const struct svc_serv *serv); +static void svc_unregister(const struct svc_serv *serv, struct net *net); #define svc_serv_is_pooled(serv) ((serv)->sv_function) @@ -368,23 +369,24 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) return &serv->sv_pools[pidx % serv->sv_nrpools]; } -static int svc_rpcb_setup(struct svc_serv *serv) +int svc_rpcb_setup(struct svc_serv *serv, struct net *net) { int err; - err = rpcb_create_local(); + err = rpcb_create_local(net); if (err) return err; /* Remove any stale portmap registrations */ - svc_unregister(serv); + svc_unregister(serv, net); return 0; } +EXPORT_SYMBOL_GPL(svc_rpcb_setup); -void svc_rpcb_cleanup(struct svc_serv *serv) +void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) { - svc_unregister(serv); - rpcb_put_local(); + svc_unregister(serv, net); + rpcb_put_local(net); } EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); @@ -410,7 +412,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv, struct net *net)) { struct svc_serv *serv; unsigned int vers; @@ -470,7 +472,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, } if (svc_uses_rpcbind(serv)) { - if (svc_rpcb_setup(serv) < 0) { + if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) { kfree(serv->sv_pools); kfree(serv); return NULL; @@ -484,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv, struct net *net)) { return __svc_create(prog, bufsize, /*npools*/1, shutdown); } @@ -492,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv, struct net *net), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; @@ -509,6 +511,24 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, } EXPORT_SYMBOL_GPL(svc_create_pooled); +void svc_shutdown_net(struct svc_serv *serv, struct net *net) +{ + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ + svc_close_net(serv, net); + + if (serv->sv_shutdown) + serv->sv_shutdown(serv, net); +} +EXPORT_SYMBOL_GPL(svc_shutdown_net); + /* * Destroy an RPC service. Should be called with appropriate locking to * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. @@ -516,6 +536,8 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_destroy(struct svc_serv *serv) { + struct net *net = current->nsproxy->net_ns; + dprintk("svc: svc_destroy(%s, %d)\n", serv->sv_program->pg_name, serv->sv_nrthreads); @@ -529,19 +551,15 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); + + svc_shutdown_net(serv, net); + /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: + * The last user is gone and thus all sockets have to be destroyed to + * the point. Check this. */ - svc_close_all(serv); - - if (serv->sv_shutdown) - serv->sv_shutdown(serv); + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); cache_clean_deferred(serv); @@ -795,7 +813,8 @@ EXPORT_SYMBOL_GPL(svc_exit_thread); * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register4(const u32 program, const u32 version, +static int __svc_rpcb_register4(struct net *net, const u32 program, + const u32 version, const unsigned short protocol, const unsigned short port) { @@ -818,7 +837,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(program, version, + error = rpcb_v4_register(net, program, version, (const struct sockaddr *)&sin, netid); /* @@ -826,7 +845,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, * registration request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(program, version, protocol, port); + error = rpcb_register(net, program, version, protocol, port); return error; } @@ -842,7 +861,8 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register6(const u32 program, const u32 version, +static int __svc_rpcb_register6(struct net *net, const u32 program, + const u32 version, const unsigned short protocol, const unsigned short port) { @@ -865,7 +885,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, return -ENOPROTOOPT; } - error = rpcb_v4_register(program, version, + error = rpcb_v4_register(net, program, version, (const struct sockaddr *)&sin6, netid); /* @@ -885,7 +905,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(const char *progname, +static int __svc_register(struct net *net, const char *progname, const u32 program, const u32 version, const int family, const unsigned short protocol, @@ -895,12 +915,12 @@ static int __svc_register(const char *progname, switch (family) { case PF_INET: - error = __svc_rpcb_register4(program, version, + error = __svc_rpcb_register4(net, program, version, protocol, port); break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: - error = __svc_rpcb_register6(program, version, + error = __svc_rpcb_register6(net, program, version, protocol, port); #endif } @@ -914,14 +934,16 @@ static int __svc_register(const char *progname, /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @net: net namespace for the service to register * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const int family, - const unsigned short proto, const unsigned short port) +int svc_register(const struct svc_serv *serv, struct net *net, + const int family, const unsigned short proto, + const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -946,7 +968,7 @@ int svc_register(const struct svc_serv *serv, const int family, if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(progp->pg_name, progp->pg_prog, + error = __svc_register(net, progp->pg_name, progp->pg_prog, i, family, proto, port); if (error < 0) break; @@ -963,19 +985,19 @@ int svc_register(const struct svc_serv *serv, const int family, * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient * in this case to clear all existing entries for [program, version]. */ -static void __svc_unregister(const u32 program, const u32 version, +static void __svc_unregister(struct net *net, const u32 program, const u32 version, const char *progname) { int error; - error = rpcb_v4_register(program, version, NULL, ""); + error = rpcb_v4_register(net, program, version, NULL, ""); /* * User space didn't support rpcbind v4, so retry this * request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(program, version, 0, 0); + error = rpcb_register(net, program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); @@ -989,7 +1011,7 @@ static void __svc_unregister(const u32 program, const u32 version, * The result of unregistration is reported via dprintk for those who want * verification of the result, but is otherwise not important. */ -static void svc_unregister(const struct svc_serv *serv) +static void svc_unregister(const struct svc_serv *serv, struct net *net) { struct svc_program *progp; unsigned long flags; @@ -1006,7 +1028,7 @@ static void svc_unregister(const struct svc_serv *serv) dprintk("svc: attempting to unregister %sv%u\n", progp->pg_name, i); - __svc_unregister(progp->pg_prog, i, progp->pg_name); + __svc_unregister(net, progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 74cb0d8e9ca..4bda09d7e1a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -922,48 +922,65 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; list_for_each_entry(xprt, xprt_list, xpt_list) { + if (xprt->xpt_net != net) + continue; set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags); } } -void svc_close_all(struct svc_serv *serv) +static void svc_clear_pools(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; int i; - svc_close_list(&serv->sv_tempsocks); - svc_close_list(&serv->sv_permsocks); - for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[i]; spin_lock_bh(&pool->sp_lock); - while (!list_empty(&pool->sp_sockets)) { - xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { + if (xprt->xpt_net != net) + continue; list_del_init(&xprt->xpt_ready); } spin_unlock_bh(&pool->sp_lock); } +} + +static void svc_clear_list(struct list_head *xprt_list, struct net *net) +{ + struct svc_xprt *xprt; + struct svc_xprt *tmp; + + list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + if (xprt->xpt_net != net) + continue; + svc_delete_xprt(xprt); + } + list_for_each_entry(xprt, xprt_list, xpt_list) + BUG_ON(xprt->xpt_net == net); +} + +void svc_close_net(struct svc_serv *serv, struct net *net) +{ + svc_close_list(&serv->sv_tempsocks, net); + svc_close_list(&serv->sv_permsocks, net); + + svc_clear_pools(serv, net); /* * At this point the sp_sockets lists will stay empty, since * svc_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) - svc_delete_xprt(xprt); - list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) - svc_delete_xprt(xprt); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + svc_clear_list(&serv->sv_tempsocks, net); + svc_clear_list(&serv->sv_permsocks, net); } /* @@ -1089,6 +1106,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * svc_find_xprt - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name + * @net: owner net pointer * @af: Address family of transport's local address * @port: transport's IP port number * @@ -1101,7 +1119,8 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * service's list that has a matching class name. */ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, - const sa_family_t af, const unsigned short port) + struct net *net, const sa_family_t af, + const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; @@ -1112,6 +1131,8 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + if (xprt->xpt_net != net) + continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 01153ead1db..bcd574f2ac5 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -211,7 +211,7 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0) + if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; switch (address.sa.sa_family) { case AF_INET: @@ -436,7 +436,6 @@ struct unix_gid { uid_t uid; struct group_info *gi; }; -static struct cache_head *gid_table[GID_HASHMAX]; static void unix_gid_put(struct kref *kref) { @@ -494,8 +493,7 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); } -static struct unix_gid *unix_gid_lookup(uid_t uid); -extern struct cache_detail unix_gid_cache; +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -539,19 +537,19 @@ static int unix_gid_parse(struct cache_detail *cd, GROUP_AT(ug.gi, i) = gid; } - ugp = unix_gid_lookup(uid); + ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; ug.h.flags = 0; ug.h.expiry_time = expiry; - ch = sunrpc_cache_update(&unix_gid_cache, + ch = sunrpc_cache_update(cd, &ug.h, &ugp->h, hash_long(uid, GID_HASHBITS)); if (!ch) err = -ENOMEM; else { err = 0; - cache_put(ch, &unix_gid_cache); + cache_put(ch, cd); } } else err = -ENOMEM; @@ -587,10 +585,9 @@ static int unix_gid_show(struct seq_file *m, return 0; } -struct cache_detail unix_gid_cache = { +static struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, - .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, @@ -602,14 +599,42 @@ struct cache_detail unix_gid_cache = { .alloc = unix_gid_alloc, }; -static struct unix_gid *unix_gid_lookup(uid_t uid) +int unix_gid_cache_create(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; + + cd = cache_create_net(&unix_gid_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); + err = cache_register_net(cd, net); + if (err) { + cache_destroy_net(cd, net); + return err; + } + sn->unix_gid_cache = cd; + return 0; +} + +void unix_gid_cache_destroy(struct net *net) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->unix_gid_cache; + + sn->unix_gid_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); +} + +static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(&unix_gid_cache, &ug.h, - hash_long(uid, GID_HASHBITS)); + ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS)); if (ch) return container_of(ch, struct unix_gid, h); else @@ -621,11 +646,13 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) struct unix_gid *ug; struct group_info *gi; int ret; + struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, + sunrpc_net_id); - ug = unix_gid_lookup(uid); + ug = unix_gid_lookup(sn->unix_gid_cache, uid); if (!ug) return ERR_PTR(-EAGAIN); - ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); + ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle); switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); @@ -633,7 +660,7 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); - cache_put(&ug->h, &unix_gid_cache); + cache_put(&ug->h, sn->unix_gid_cache); return gi; default: return ERR_PTR(-EAGAIN); @@ -849,56 +876,45 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; +static struct cache_detail ip_map_cache_template = { + .owner = THIS_MODULE, + .hash_size = IP_HASHMAX, + .name = "auth.unix.ip", + .cache_put = ip_map_put, + .cache_upcall = ip_map_upcall, + .cache_parse = ip_map_parse, + .cache_show = ip_map_show, + .match = ip_map_match, + .init = ip_map_init, + .update = update, + .alloc = ip_map_alloc, +}; + int ip_map_cache_create(struct net *net) { - int err = -ENOMEM; - struct cache_detail *cd; - struct cache_head **tbl; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd; + int err; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = IP_HASHMAX, - cd->hash_table = tbl, - cd->name = "auth.unix.ip", - cd->cache_put = ip_map_put, - cd->cache_upcall = ip_map_upcall, - cd->cache_parse = ip_map_parse, - cd->cache_show = ip_map_show, - cd->match = ip_map_match, - cd->init = ip_map_init, - cd->update = update, - cd->alloc = ip_map_alloc, - + cd = cache_create_net(&ip_map_cache_template, net); + if (IS_ERR(cd)) + return PTR_ERR(cd); err = cache_register_net(cd, net); - if (err) - goto err_reg; - + if (err) { + cache_destroy_net(cd, net); + return err; + } sn->ip_map_cache = cd; return 0; - -err_reg: - kfree(tbl); -err_tbl: - kfree(cd); -err_cd: - return err; } void ip_map_cache_destroy(struct net *net) { - struct sunrpc_net *sn; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct cache_detail *cd = sn->ip_map_cache; - sn = net_generic(net, sunrpc_net_id); - cache_purge(sn->ip_map_cache); - cache_unregister_net(sn->ip_map_cache, net); - kfree(sn->ip_map_cache->hash_table); - kfree(sn->ip_map_cache); + sn->ip_map_cache = NULL; + cache_purge(cd); + cache_unregister_net(cd, net); + cache_destroy_net(cd, net); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 464570906f8..40ae884db86 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -396,7 +396,7 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp, int buflen, unsigned int base) { size_t save_iovlen; - void __user *save_iovbase; + void *save_iovbase; unsigned int i; int ret; @@ -1409,7 +1409,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, + *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family, + inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index e65dcc61333..af7d339add9 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -20,6 +20,8 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> +#include "netns.h" + /* * Declare the debug flags here */ @@ -110,7 +112,7 @@ proc_dodebug(ctl_table *table, int write, *(unsigned int *) table->data = value; /* Display the RPC tasks on writing to rpc_debug */ if (strcmp(table->procname, "rpc_debug") == 0) - rpc_show_tasks(); + rpc_show_tasks(&init_net); } else { if (!access_ok(VERIFY_WRITE, buffer, left)) return -EFAULT; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c64c0ef519b..0cbcd1ab49a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -66,6 +66,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -292,54 +293,57 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return retval; } -static void __xprt_lock_write_next(struct rpc_xprt *xprt) +static bool __xprt_lock_write_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; req->rq_ntrans++; } - return; + return true; +} -out_unlock: +static void __xprt_lock_write_next(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) + return; xprt_clear_locked(xprt); } -static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - if (RPCXPRT_CONGESTED(xprt)) - goto out_unlock; - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; if (req == NULL) { xprt->snd_task = task; - return; + return true; } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; req->rq_bytes_sent = 0; req->rq_ntrans++; - return; + return true; } + return false; +} + +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + if (RPCXPRT_CONGESTED(xprt)) + goto out_unlock; + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) + return; out_unlock: xprt_clear_locked(xprt); } @@ -712,9 +716,7 @@ void xprt_connect(struct rpc_task *task) if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { - if (task->tk_rqstp) - task->tk_rqstp->rq_bytes_sent = 0; - + task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); @@ -750,7 +752,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - task->tk_client->cl_server); + xprt->servername); xprt_release_write(xprt, task); task->tk_status = -EIO; } @@ -884,7 +886,7 @@ void xprt_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int status, numreqs; dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); @@ -921,9 +923,14 @@ void xprt_transmit(struct rpc_task *task) xprt->ops->set_retrans_timeout(task); + numreqs = atomic_read(&xprt->num_reqs); + if (numreqs > xprt->stat.max_slots) + xprt->stat.max_slots = numreqs; xprt->stat.sends++; xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; xprt->stat.bklog_u += xprt->backlog.qlen; + xprt->stat.sending_u += xprt->sending.qlen; + xprt->stat.pending_u += xprt->pending.qlen; /* Don't race with disconnect */ if (!xprt_connected(xprt)) @@ -1131,7 +1138,10 @@ void xprt_release(struct rpc_task *task) return; xprt = req->rq_xprt; - rpc_count_iostats(task); + if (task->tk_ops->rpc_count_stats != NULL) + task->tk_ops->rpc_count_stats(task, task->tk_calldata); + else if (task->tk_client) + rpc_count_iostats(task, task->tk_client->cl_metrics); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) @@ -1220,6 +1230,17 @@ found: (unsigned long)xprt); else init_timer(&xprt->timer); + + if (strlen(args->servername) > RPC_MAXNETNAMELEN) { + xprt_destroy(xprt); + return ERR_PTR(-EINVAL); + } + xprt->servername = kstrdup(args->servername, GFP_KERNEL); + if (xprt->servername == NULL) { + xprt_destroy(xprt); + return ERR_PTR(-ENOMEM); + } + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1242,6 +1263,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); + kfree(xprt->servername); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 1776e5731dc..558fbab574f 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -771,13 +771,18 @@ repost: /* get request object */ req = rpcr_to_rdmar(rqst); + if (req->rl_reply) { + spin_unlock(&xprt->transport_lock); + dprintk("RPC: %s: duplicate reply 0x%p to RPC " + "request 0x%p: xid 0x%08x\n", __func__, rep, req, + headerp->rm_xid); + goto repost; + } dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", __func__, rep, req, rqst, headerp->rm_xid); - BUG_ON(!req || req->rl_reply); - /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 28236bab57f..745973b729a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1490,6 +1490,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, u8 key; int len, pageoff; int i, rc; + int seg_len; + u64 pa; + int page_no; pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -1497,11 +1500,15 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, len = -pageoff; if (*nsegs > RPCRDMA_MAX_DATA_SEGS) *nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < *nsegs;) { + for (page_no = i = 0; i < *nsegs;) { rpcrdma_map_one(ia, seg, writing); - seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; + pa = seg->mr_dma; + for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { + seg1->mr_chunk.rl_mw->r.frmr.fr_pgl-> + page_list[page_no++] = pa; + pa += PAGE_SIZE; + } len += seg->mr_len; - BUG_ON(seg->mr_len > PAGE_SIZE); ++seg; ++i; /* Check for holes */ @@ -1540,9 +1547,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; - frmr_wr.wr.fast_reg.page_list_len = i; + frmr_wr.wr.fast_reg.page_list_len = page_no; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; BUG_ON(frmr_wr.wr.fast_reg.length < len); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 55472c48825..92bc5181dbe 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -53,12 +53,12 @@ static void xs_close(struct rpc_xprt *xprt); /* * xprtsock tunables */ -unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; -unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; -unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; -unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; +static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; @@ -2227,7 +2227,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " - "%llu %llu\n", + "%llu %llu %lu %llu %llu\n", xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2236,7 +2236,10 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /** @@ -2249,14 +2252,18 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", + seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " + "%lu %llu %llu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /** @@ -2273,7 +2280,8 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; - seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", + seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu %lu %llu %llu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, @@ -2283,7 +2291,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u); + xprt->stat.bklog_u, + xprt->stat.max_slots, + xprt->stat.sending_u, + xprt->stat.pending_u); } /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e75813904f2..c3e65aebecc 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -74,15 +74,13 @@ static struct ctl_table_root net_sysctl_ro_root = { static int __net_init sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, - &net_sysctl_ro_root.default_set, - is_seen); + setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen); return 0; } static void __net_exit sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctls.list)); + retire_sysctl_set(&net->sysctls); } static struct pernet_operations sysctl_pernet_ops = { @@ -90,36 +88,32 @@ static struct pernet_operations sysctl_pernet_ops = { .exit = sysctl_net_exit, }; -static __init int sysctl_init(void) +static __init int net_sysctl_init(void) { int ret; ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out; - register_sysctl_root(&net_sysctl_root); - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); + setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL); register_sysctl_root(&net_sysctl_ro_root); + register_sysctl_root(&net_sysctl_root); out: return ret; } -subsys_initcall(sysctl_init); +subsys_initcall(net_sysctl_init); struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table) { - struct nsproxy namespaces; - namespaces = *current->nsproxy; - namespaces.net_ns = net; - return __register_sysctl_paths(&net_sysctl_root, - &namespaces, path, table); + return __register_sysctl_paths(&net->sysctls, path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_table); struct ctl_table_header *register_net_sysctl_rotable(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&net_sysctl_ro_root, - &init_nsproxy, path, table); + return __register_sysctl_paths(&net_sysctl_ro_root.default_set, + path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb4277c3318..d510353ef43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2206,7 +2206,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } /* No write status requested, avoid expensive OUT tests. */ - if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT))) + if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) return mask; writable = unix_writable(sk); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 47bacd8c025..95a338c89f9 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -21,7 +21,7 @@ static int xfrm_output2(struct sk_buff *skb); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm_skb_check_space(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) @@ -48,7 +48,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto resume; do { - err = xfrm_state_check_space(x, skb); + err = xfrm_skb_check_space(skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 39e02c54ed2..2f6d11d04a2 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -167,7 +167,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -279,7 +279,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -473,7 +473,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + x->repl->notify(x, XFRM_REPLAY_UPDATE); } static struct xfrm_replay xfrm_replay_legacy = { |