From ef13a262c339544a81e3ba8d15ee345641593da4 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:42 +0800 Subject: tipc: replace config_mutex lock with RTNL lock There have two paths where we can configure or change bearer status: one is that bearer is configured from user space with tipc-config tool; another one is that bearer is changed by notification events from its attached interface. On the first path, one dedicated config_mutex lock is guarded; on the latter path, RTNL lock has been placed to serialize the process of dealing with interface events. So, if RTNL lock is also used to protect the first path, this will not only extremely help us simplify current locking policy, but also config_mutex lock can be deleted as well. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/config.c | 6 ++---- net/tipc/core.h | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/config.c b/net/tipc/config.c index 4b981c053823..251f5a2028e4 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,8 +42,6 @@ #define REPLY_TRUNCATED "\n" -static DEFINE_MUTEX(config_mutex); - static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ static int rep_headroom; /* reply message headroom to use */ @@ -223,7 +221,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - mutex_lock(&config_mutex); + rtnl_lock(); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -337,6 +335,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - mutex_unlock(&config_mutex); + rtnl_unlock(); return rep_tlv_buf; } diff --git a/net/tipc/core.h b/net/tipc/core.h index 8985bbcb942b..36cbf158845f 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -56,7 +56,7 @@ #include #include #include - +#include #define TIPC_MOD_VER "2.0.0" -- cgit v1.2.3 From ca07fb07c9a362149ea72f0de8f7eefd00489ecc Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:43 +0800 Subject: tipc: adjust locking policy of protecting tipc_ptr pointer of net_device Currently the 'tipc_ptr' pointer is protected by tipc_net_lock write lock on write side, and RCU read lock is applied to read side. In addition, there have two paths on write side where we may change variables pointed by the 'tipc_ptr' pointer: one is to configure bearer by tipc-config tool and another one is that bearer status is changed by notification events of its attached interface. But on the latter path, we improperly deem that accessing 'tipc_ptr' pointer happens on read side with rcu_read_lock() although some variables pointed by the 'tipc_ptr' pointer are changed possibly. Moreover, as now the both paths are guarded by RTNL lock, it's better to adjust the locking policy of 'tipc_ptr' pointer protection, allowing RTNL instead of tipc_net_lock write lock to protect it on write side, which will help us purge tipc_net_lock in the future. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3fef7eb776dc..dfb4c7fe4865 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -535,7 +535,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, } rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); + b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; @@ -568,12 +568,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); - if (!b_ptr) { - rcu_read_unlock(); + b_ptr = rtnl_dereference(dev->tipc_ptr); + if (!b_ptr) return NOTIFY_DONE; - } b_ptr->mtu = dev->mtu; @@ -595,8 +592,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_disable_bearer(b_ptr->name); break; } - rcu_read_unlock(); - return NOTIFY_OK; } -- cgit v1.2.3 From f97e455abf0d83b7d69da295163db18e3ebb4d8b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:44 +0800 Subject: tipc: use RTNL lock to protect tipc_net_stop routine As the tipc network initialization(ie, tipc_net_start routine) is under RTNL protection, its corresponding deinitialization part(ie, tipc_net_stop routine) should be protected by RTNL too. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/net.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/net.c b/net/tipc/net.c index 4c564eb69e1a..24d2d21266a4 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -195,11 +195,13 @@ void tipc_net_stop(void) return; tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); + rtnl_lock(); write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_bclink_stop(); tipc_node_stop(); write_unlock_bh(&tipc_net_lock); + rtnl_unlock(); pr_info("Left network mode\n"); } -- cgit v1.2.3 From f8322dfce5766c8e26d9224cbcaf6fdc0b2eb04d Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:45 +0800 Subject: tipc: convert bearer_list to RCU list Convert bearer_list to RCU list. It's protected by RTNL lock on update side, and RCU read lock is applied to read side. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bcast.c | 9 ++++++--- net/tipc/bearer.c | 18 +++++++++--------- net/tipc/bearer.h | 4 +++- 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 95ab5ef92920..223a19929024 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -659,6 +659,7 @@ void tipc_bcbearer_sort(void) { struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; + struct tipc_bearer *b; int b_index; int pri; @@ -667,8 +668,9 @@ void tipc_bcbearer_sort(void) /* Group bearers by priority (can assume max of two per priority) */ memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = bearer_list[b_index]; + b = rcu_dereference_rtnl(bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -677,6 +679,7 @@ void tipc_bcbearer_sort(void) else bp_temp[b->priority].secondary = b; } + rcu_read_unlock(); /* Create array of bearer pairs for broadcasting */ bp_curr = bcbearer->bpairs; @@ -784,7 +787,7 @@ void tipc_bclink_init(void) bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->b_ptr = &bcbearer->bearer; - bearer_list[BCBEARER] = &bcbearer->bearer; + rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); } @@ -795,7 +798,7 @@ void tipc_bclink_stop(void) tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); - bearer_list[BCBEARER] = NULL; + RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); memset(bclink, 0, sizeof(*bclink)); memset(bcbearer, 0, sizeof(*bcbearer)); } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index dfb4c7fe4865..65b17639e43d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,7 +49,7 @@ static struct tipc_media * const media_info_array[] = { NULL }; -struct tipc_bearer *bearer_list[MAX_BEARERS + 1]; +struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -178,7 +178,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } @@ -201,7 +201,7 @@ struct sk_buff *tipc_bearer_get_names(void) read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = bearer_list[j]; + b = rtnl_dereference(bearer_list[j]); if (!b) continue; if (b->media == media_info_array[i]) { @@ -287,7 +287,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -344,7 +344,7 @@ restart: goto exit; } - bearer_list[bearer_id] = b_ptr; + rcu_assign_pointer(bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -385,12 +385,12 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == bearer_list[i]) { - bearer_list[i] = NULL; + if (b_ptr == rtnl_dereference(bearer_list[i])) { + RCU_INIT_POINTER(bearer_list[i], NULL); break; } } - kfree(b_ptr); + kfree_rcu(b_ptr, rcu); } int tipc_disable_bearer(const char *name) @@ -628,7 +628,7 @@ void tipc_bearer_stop(void) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr) { bearer_disable(b_ptr, true); bearer_list[i] = NULL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ba48145e871d..b67b7ea4cc36 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -113,6 +113,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -133,6 +134,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct rcu_head rcu; u32 priority; u32 window; u32 tolerance; @@ -150,7 +152,7 @@ struct tipc_bearer_names { struct tipc_link; -extern struct tipc_bearer *bearer_list[]; +extern struct tipc_bearer __rcu *bearer_list[]; /* * TIPC routines available to supported media types -- cgit v1.2.3 From 7a2f7d18e79b09c5c5a65fb1fa0e31ad046b3116 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:46 +0800 Subject: tipc: decouple the relationship between bearer and link Currently on both paths of message transmission and reception, the read lock of tipc_net_lock must be held before bearer is accessed, while the write lock of tipc_net_lock has to be taken before bearer is configured. Although it can ensure that bearer is always valid on the two data paths, link and bearer is closely bound together. So as the part of effort of removing tipc_net_lock, the locking policy of bearer protection will be adjusted as below: on the two data paths, RCU is used, and on the configuration path of bearer, RTNL lock is applied. Now RCU just covers the path of message reception. To make it possible to protect the path of message transmission with RCU, link should not use its stored bearer pointer to access bearer, but it should use the bearer identity of its attached bearer as index to get bearer instance from bearer_list array, which can help us decouple the relationship between bearer and link. As a result, bearer on the path of message transmission can be safely protected by RCU when we access bearer_list array within RCU lock protection. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bcast.c | 8 ++++---- net/tipc/bearer.c | 40 ++++++++++++++++++++++++++++++---------- net/tipc/bearer.h | 6 +++--- net/tipc/discover.c | 17 ++++++++++------- net/tipc/link.c | 49 +++++++++++++++++++++++++++++++++---------------- net/tipc/link.h | 6 ++++-- net/tipc/node.c | 8 ++++---- 7 files changed, 88 insertions(+), 46 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 223a19929024..51dab96ddd5f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -321,7 +321,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) : n_ptr->bclink.last_sent); spin_lock_bh(&bc_lock); - tipc_bearer_send(&bcbearer->bearer, buf, NULL); + tipc_bearer_send(MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; spin_unlock_bh(&bc_lock); kfree_skb(buf); @@ -627,13 +627,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b, buf, &b->bcast_addr); + tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b, tbuf, &b->bcast_addr); + tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } @@ -786,7 +786,7 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->b_ptr = &bcbearer->bearer; + bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 65b17639e43d..e0625e9e2c72 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -215,18 +215,32 @@ struct sk_buff *tipc_bearer_get_names(void) return buf; } -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(u32 bearer_id, u32 dest) { - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_add_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_nmap_add(&b_ptr->nodes, dest); + tipc_bcbearer_sort(); + tipc_disc_add_dest(b_ptr->link_req); + } + rcu_read_unlock(); } -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_remove_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_nmap_remove(&b_ptr->nodes, dest); + tipc_bcbearer_sort(); + tipc_disc_remove_dest(b_ptr->link_req); + } + rcu_read_unlock(); } /** @@ -507,10 +521,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { - b->media->send_msg(buf, b, dest); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (likely(b_ptr)) + b_ptr->media->send_msg(buf, b_ptr, dest); + rcu_read_unlock(); } /** diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index b67b7ea4cc36..2fa86bd67c0a 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -183,14 +183,14 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_add_dest(u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 542fe3413dc4..3a8f211f08c7 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -46,8 +46,9 @@ /** * struct tipc_link_req - information about an ongoing link setup request - * @bearer: bearer issuing requests + * @bearer_id: identity of bearer issuing requests * @dest: destination address for request messages + * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent @@ -55,8 +56,9 @@ * @timer_intv: current interval between requests (in ms) */ struct tipc_link_req { - struct tipc_bearer *bearer; + u32 bearer_id; struct tipc_media_addr dest; + u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; @@ -241,7 +243,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) if ((type == DSC_REQ_MSG) && !link_fully_up) { rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); if (rbuf) { - tipc_bearer_send(b_ptr, rbuf, &media_addr); + tipc_bearer_send(b_ptr->identity, rbuf, &media_addr); kfree_skb(rbuf); } } @@ -303,7 +305,7 @@ static void disc_timeout(struct tipc_link_req *req) spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->bearer->domain) && req->num_nodes) { + if (tipc_node(req->domain) && req->num_nodes) { req->timer_intv = TIPC_LINK_REQ_INACTIVE; goto exit; } @@ -315,7 +317,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -354,14 +356,15 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) } memcpy(&req->dest, dest, sizeof(*dest)); - req->bearer = b_ptr; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); return 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index c5190ab75290..229d478494b9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -101,9 +101,18 @@ static unsigned int align(unsigned int i) static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_bearer *b_ptr; u32 max_pkt; - max_pkt = (l_ptr->b_ptr->mtu & ~3); + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (!b_ptr) { + rcu_read_unlock(); + return; + } + max_pkt = (b_ptr->mtu & ~3); + rcu_read_unlock(); + if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -248,7 +257,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->peer_session = INVALID_SESSION; - l_ptr->b_ptr = b_ptr; + l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -263,6 +272,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); + l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; @@ -426,7 +436,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; @@ -477,7 +487,7 @@ static void link_activate(struct tipc_link *l_ptr) { l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); } /** @@ -777,7 +787,7 @@ int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) if (likely(!link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return dsz; } @@ -941,7 +951,7 @@ static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, if (likely(!link_congested(l_ptr))) { if (likely(msg_size(msg) <= l_ptr->max_pkt)) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return res; @@ -1204,7 +1214,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; @@ -1216,7 +1226,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); l_ptr->proto_msg_queue = NULL; @@ -1233,7 +1243,8 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (mod(next - first) < l_ptr->queue_limit[0]) { msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, + &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) msg_set_type(msg, CLOSED_MSG); l_ptr->next_out = buf->next; @@ -1352,7 +1363,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); buf = buf->next; retransmits--; l_ptr->stats.retransmitted++; @@ -1440,7 +1451,7 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain - * @tb_ptr: pointer to bearer message arrived on + * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. @@ -1752,7 +1763,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, /* Create protocol message with "out-of-sequence" sequence number */ msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); + msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); @@ -1818,7 +1829,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1843,9 +1854,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) /* record unnumbered packet arrival (force mismatch on next timeout) */ l_ptr->checkpoint--; - if (l_ptr->b_ptr->net_plane != msg_net_plane(msg)) + if (l_ptr->net_plane != msg_net_plane(msg)) if (tipc_own_addr > msg_prevnode(msg)) - l_ptr->b_ptr->net_plane = msg_net_plane(msg); + l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -2793,7 +2804,13 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) static void link_print(struct tipc_link *l_ptr, const char *str) { - pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (b_ptr) + pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); + rcu_read_unlock(); if (link_working_unknown(l_ptr)) pr_cont(":WU\n"); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8c0b49b5b2ee..4b556c181bae 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -107,7 +107,7 @@ struct tipc_stats { * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint - * @b_ptr: pointer to bearer used by link + * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link @@ -116,6 +116,7 @@ struct tipc_stats { * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') * @queue_limit: outbound message queue congestion thresholds (indexed by user) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset @@ -155,7 +156,7 @@ struct tipc_link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct tipc_bearer *b_ptr; + u32 bearer_id; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -167,6 +168,7 @@ struct tipc_link { } proto_msg; struct tipc_msg *pmsg; u32 priority; + char net_plane; u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 1d3a4999a70f..fa6823f6457a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -148,7 +148,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->working_links++; pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -208,11 +208,11 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); return; } pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -239,7 +239,7 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + n_ptr->links[l_ptr->bearer_id] = l_ptr; spin_lock_bh(&node_list_lock); tipc_num_links++; spin_unlock_bh(&node_list_lock); -- cgit v1.2.3 From 2231c5af451e4b7ae3cc56eaa4653af6ede51109 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:47 +0800 Subject: tipc: use RCU to protect media_ptr pointer Now the media_ptr pointer is protected with tipc_net_lock write lock on write side; tipc_net_lock read lock is used to read side. As the part of effort of eliminating tipc_net_lock, we decide to adjust the locking policy of media_ptr pointer protection: on write side, RTNL lock is use while on read side RCU read lock is applied. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 13 ++++++++++--- net/tipc/bearer.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e0625e9e2c72..c24a35114fd7 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -458,7 +458,7 @@ int tipc_enable_l2_media(struct tipc_bearer *b) return -ENODEV; /* Associate TIPC bearer with Ethernet bearer */ - b->media_ptr = dev; + rcu_assign_pointer(b->media_ptr, dev); memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -477,7 +477,10 @@ int tipc_enable_l2_media(struct tipc_bearer *b) */ void tipc_disable_l2_media(struct tipc_bearer *b) { - struct net_device *dev = (struct net_device *)b->media_ptr; + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); dev_put(dev); } @@ -492,8 +495,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; + struct net_device *dev; int delta; - struct net_device *dev = (struct net_device *)b->media_ptr; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; clone = skb_clone(buf, GFP_ATOMIC); if (!clone) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 2fa86bd67c0a..a983b3005e71 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -128,7 +128,7 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *media_ptr; /* initalized by media */ + void __rcu *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; -- cgit v1.2.3 From 7216cd949c9bd56a4ccd952c624ab68f8c9aa0a4 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:48 +0800 Subject: tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 ++---- net/tipc/bearer.c | 31 +++++++++------------------ net/tipc/link.c | 40 ++++++---------------------------- net/tipc/name_distr.c | 2 -- net/tipc/net.c | 59 +++++++++++++++++++++------------------------------ net/tipc/net.h | 2 -- net/tipc/node.c | 2 -- 7 files changed, 42 insertions(+), 100 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 51dab96ddd5f..0f32226db483 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -273,7 +273,7 @@ exit: /** * tipc_bclink_update_link_state - update broadcast link state * - * tipc_net_lock and node lock set + * RCU and node lock set */ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { @@ -335,8 +335,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. - * - * Only tipc_net_lock set. */ static void bclink_peek_nack(struct tipc_msg *msg) { @@ -408,7 +406,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) /** * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards * - * tipc_net_lock is read_locked, no other locks set + * RCU is locked, no other locks set */ void tipc_bclink_rcv(struct sk_buff *buf) { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index c24a35114fd7..1bd96eb465e1 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -198,7 +198,6 @@ struct sk_buff *tipc_bearer_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { b = rtnl_dereference(bearer_list[j]); @@ -211,7 +210,6 @@ struct sk_buff *tipc_bearer_get_names(void) } } } - read_unlock_bh(&tipc_net_lock); return buf; } @@ -285,13 +283,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } - write_lock_bh(&tipc_net_lock); - m_ptr = tipc_media_find(b_names.media_name); if (!m_ptr) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); - goto exit; + return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) @@ -309,14 +305,14 @@ restart: if (!strcmp(name, b_ptr->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); - goto exit; + return -EINVAL; } if ((b_ptr->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", name); - goto exit; + return -EINVAL; } pr_warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); @@ -326,21 +322,20 @@ restart: if (bearer_id >= MAX_BEARERS) { pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", name, MAX_BEARERS); - goto exit; + return -EINVAL; } b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) { - res = -ENOMEM; - goto exit; - } + if (!b_ptr) + return -ENOMEM; + strcpy(b_ptr->name, name); b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); - goto exit; + return -EINVAL; } b_ptr->identity = bearer_id; @@ -355,7 +350,7 @@ restart: bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); - goto exit; + return -EINVAL; } rcu_assign_pointer(bearer_list[bearer_id], b_ptr); @@ -363,8 +358,6 @@ restart: pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); -exit: - write_unlock_bh(&tipc_net_lock); return res; } @@ -373,19 +366,17 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); tipc_disc_delete(b_ptr->link_req); tipc_link_reset_list(b_ptr->identity); tipc_disc_create(b_ptr, &b_ptr->bcast_addr); - read_unlock_bh(&tipc_net_lock); return 0; } /** * bearer_disable * - * Note: This routine assumes caller holds tipc_net_lock. + * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { @@ -412,7 +403,6 @@ int tipc_disable_bearer(const char *name) struct tipc_bearer *b_ptr; int res; - write_lock_bh(&tipc_net_lock); b_ptr = tipc_bearer_find(name); if (b_ptr == NULL) { pr_warn("Attempt to disable unknown bearer <%s>\n", name); @@ -421,7 +411,6 @@ int tipc_disable_bearer(const char *name) bearer_disable(b_ptr, false); res = 0; } - write_unlock_bh(&tipc_net_lock); return res; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 229d478494b9..c723ee90219d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -835,7 +835,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) struct tipc_node *n_ptr; int res = -ELINKCONG; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -848,7 +847,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) } else { kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); return res; } @@ -912,7 +910,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) if (list_empty(message_list)) return; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -927,7 +924,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); /* discard the messages if they couldn't be sent */ list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { @@ -989,7 +985,6 @@ again: if (unlikely(res < 0)) return res; - read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); @@ -1000,7 +995,6 @@ again: &sender->max_pkt); exit: tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return res; } @@ -1017,7 +1011,6 @@ exit: */ sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) @@ -1028,7 +1021,6 @@ exit: } tipc_node_unlock(node); } - read_unlock_bh(&tipc_net_lock); /* Couldn't find a link to the destination node */ kfree_skb(buf); @@ -1273,12 +1265,9 @@ static void link_reset_all(unsigned long addr) char addr_string[16]; u32 i; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) { - read_unlock_bh(&tipc_net_lock); + if (!n_ptr) return; /* node no longer exists */ - } tipc_node_lock(n_ptr); @@ -1293,7 +1282,6 @@ static void link_reset_all(unsigned long addr) } tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); } static void link_retransmit_failure(struct tipc_link *l_ptr, @@ -1458,7 +1446,6 @@ static int link_recv_buf_validate(struct sk_buff *buf) */ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); while (head) { struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1646,7 +1633,6 @@ unlock_discard: discard: kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); } /** @@ -2408,8 +2394,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) /* tipc_link_find_owner - locate owner node of link by link's name * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. - * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; - * this also prevents link deletion. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ @@ -2471,7 +2455,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value) * @new_value: new value of link, bearer, or media setting * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) * - * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. + * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. * * Returns 0 if value updated and negative value on error. */ @@ -2577,9 +2561,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space " (cannot change setting on broadcast link)"); } - read_lock_bh(&tipc_net_lock); res = link_cmd_set_value(args->name, new_value, cmd); - read_unlock_bh(&tipc_net_lock); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2613,22 +2595,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return tipc_cfg_reply_error_string("link not found"); - } + tipc_node_lock(node); l_ptr = node->links[bearer_id]; if (!l_ptr) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } link_reset_statistics(l_ptr); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } @@ -2661,18 +2639,15 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return 0; - } + tipc_node_lock(node); l = node->links[bearer_id]; if (!l) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return 0; } @@ -2738,7 +2713,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) (s->accu_queue_sz / s->queue_sz_counts) : 0); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return ret; } @@ -2789,7 +2763,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) if (dest == tipc_own_addr) return MAX_MSG_SIZE; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -2798,7 +2771,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) res = l_ptr->max_pkt; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); return res; } diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index aff8041dc157..36a72822601c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -248,7 +248,6 @@ void tipc_named_node_up(unsigned long nodearg) u32 max_item_buf = 0; /* compute maximum amount of publication data to send per message */ - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(node); if (n_ptr) { tipc_node_lock(n_ptr); @@ -258,7 +257,6 @@ void tipc_named_node_up(unsigned long nodearg) ITEM_SIZE) * ITEM_SIZE; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); if (!max_item_buf) return; diff --git a/net/tipc/net.c b/net/tipc/net.c index 24d2d21266a4..75bb39025d53 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -45,39 +45,34 @@ /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual - * port and node/link instances. The code consists of three major + * port and node/link instances. The code consists of four major * locking domains, each protected with their own disjunct set of locks. * - * 1: The routing hierarchy. - * Comprises the structures 'zone', 'cluster', 'node', 'link' - * and 'bearer'. The whole hierarchy is protected by a big - * read/write lock, tipc_net_lock, to enssure that nothing is added - * or removed while code is accessing any of these structures. - * This layer must not be called from the two others while they - * hold any of their own locks. - * Neither must it itself do any upcalls to the other two before - * it has released tipc_net_lock and other protective locks. + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. * - * Within the tipc_net_lock domain there are two sub-domains;'node' and - * 'bearer', where local write operations are permitted, - * provided that those are protected by individual spin_locks - * per instance. Code holding tipc_net_lock(read) and a node spin_lock - * is permitted to poke around in both the node itself and its - * subordinate links. I.e, it can update link counters and queues, - * change link state, send protocol messages, and alter the - * "active_links" array in the node; but it can _not_ remove a link - * or a node from the overall structure. - * Correspondingly, individual bearers may change status within a - * tipc_net_lock(read), protected by an individual spin_lock ber bearer - * instance, but it needs tipc_net_lock(write) to remove/add any bearers. + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. * + * In addition, all members in node structure including link instances + * are protected by node spin lock. * - * 2: The transport level of the protocol. - * This consists of the structures port, (and its user level - * representations, such as user_port and tipc_sock), reference and - * tipc_user (port.c, reg.c, socket.c). + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). * - * This layer has four different locks: + * This layer has four different locks: * - The tipc_port spin_lock. This is protecting each port instance * from parallel data access and removal. Since we can not place * this lock in the port itself, it has been placed in the @@ -96,7 +91,7 @@ * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * - * 3: The name table (name_table.c, name_distr.c, subscription.c) + * 4: The name table (name_table.c, name_distr.c, subscription.c) * - There is one big read/write-lock (tipc_nametbl_lock) protecting the * overall name table structure. Nothing must be added/removed to * this structure without holding write access to it. @@ -108,8 +103,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -DEFINE_RWLOCK(tipc_net_lock); - static void net_route_named_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -175,15 +168,13 @@ void tipc_net_start(u32 addr) { char addr_string[16]; - write_lock_bh(&tipc_net_lock); tipc_own_addr = addr; tipc_named_reinit(); tipc_port_reinit(); tipc_bclink_init(); - write_unlock_bh(&tipc_net_lock); - tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, TIPC_ZONE_SCOPE, 0, tipc_own_addr); + pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); @@ -196,11 +187,9 @@ void tipc_net_stop(void) tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); rtnl_lock(); - write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_bclink_stop(); tipc_node_stop(); - write_unlock_bh(&tipc_net_lock); rtnl_unlock(); pr_info("Left network mode\n"); diff --git a/net/tipc/net.h b/net/tipc/net.h index 079daadb3f72..f781cae8df4b 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern rwlock_t tipc_net_lock; - void tipc_net_route_msg(struct sk_buff *buf); void tipc_net_start(u32 addr); diff --git a/net/tipc/node.c b/net/tipc/node.c index fa6823f6457a..be90115cda1a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -273,14 +273,12 @@ static void node_name_purge_complete(unsigned long node_addr) { struct tipc_node *n_ptr; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(node_addr); if (n_ptr) { tipc_node_lock(n_ptr); n_ptr->block_setup &= ~WAIT_NAMES_GONE; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); } static void node_lost_contact(struct tipc_node *n_ptr) -- cgit v1.2.3 From f1c8d8cb82113ea6f41d2774127d3d08a4ca8d46 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:49 +0800 Subject: tipc: make media_ptr pointed netdevice valid The 'media_ptr' pointer in bearer structure which points to network device, is protected by RCU. So, before netdevice is released, synchronize_net() should be involved to prevent no any user of the netdevice on read side from accessing it after it is freed. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 1bd96eb465e1..402e99472a63 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -471,6 +471,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) dev = (struct net_device *)rtnl_dereference(b->media_ptr); RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); dev_put(dev); } -- cgit v1.2.3 From 4ae88c94d3b52dc47c6ad9352991219862f703f9 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:50 +0800 Subject: tipc: use bearer_disable to disable bearer in tipc_l2_device_event As bearer pointer is known in tipc_l2_device_event(), it's unnecessary to search it again in tipc_disable_bearer(). If tipc_disable_bearer() is replaced with bearer_disable() in tipc_l2_device_event(), this will help us save a bit time when bearer is disabled. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 402e99472a63..44fd22a0c070 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -606,7 +606,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - tipc_disable_bearer(b_ptr->name); + bearer_disable(b_ptr, false); break; } return NOTIFY_OK; -- cgit v1.2.3 From 28dd94187afd660a350d01d6bad4a915a6d570b8 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:51 +0800 Subject: tipc: use bc_lock to protect node map in bearer structure The node map variable - 'nodes' in bearer structure is only used by bclink. When bclink accesses it, bc_lock is held. But when change it, for instance, in tipc_bearer_add_dest() or tipc_bearer_remove_dest() the bc_lock is not taken at all. To avoid any inconsistent data, we should always grab bc_lock while accessing node map variable. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bcast.c | 14 ++++++++++---- net/tipc/bcast.h | 5 +---- net/tipc/bearer.c | 6 ++---- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 0f32226db483..119a59b4bec6 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -112,6 +112,8 @@ const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff); +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); static u32 bcbuf_acks(struct sk_buff *buf) { @@ -653,7 +655,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(void) +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) { struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; @@ -663,6 +665,11 @@ void tipc_bcbearer_sort(void) spin_lock_bh(&bc_lock); + if (action) + tipc_nmap_add(nm_ptr, node); + else + tipc_nmap_remove(nm_ptr, node); + /* Group bearers by priority (can assume max of two per priority) */ memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); @@ -801,11 +808,10 @@ void tipc_bclink_stop(void) memset(bcbearer, 0, sizeof(*bcbearer)); } - /** * tipc_nmap_add - add a node to a node map */ -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -820,7 +826,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) /** * tipc_nmap_remove - remove a node from a node map */ -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a80ef54b818e..7c1ef1b3d7b3 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -69,9 +69,6 @@ struct tipc_node; extern const char tipc_bclink_name[]; -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - /** * tipc_nmap_equal - test for equality of node maps */ @@ -98,6 +95,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(void); +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 44fd22a0c070..3abd9702b887 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -220,8 +220,7 @@ void tipc_bearer_add_dest(u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); if (b_ptr) { - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); + tipc_bcbearer_sort(&b_ptr->nodes, dest, true); tipc_disc_add_dest(b_ptr->link_req); } rcu_read_unlock(); @@ -234,8 +233,7 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); if (b_ptr) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); + tipc_bcbearer_sort(&b_ptr->nodes, dest, false); tipc_disc_remove_dest(b_ptr->link_req); } rcu_read_unlock(); -- cgit v1.2.3 From a8b9b96e959f3c035af20b1bd2ba67b0b7269b19 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 21 Apr 2014 10:55:52 +0800 Subject: tipc: fix race in disc create/delete Commit a21a584d6720ce349b05795b9bcfab3de8e58419 (tipc: fix neighbor detection problem after hw address change) introduces a race condition involving tipc_disc_delete() and tipc_disc_add/remove_dest that can cause TIPC to dereference the pointer to the bearer discovery request structure after it has been freed since a stray pointer is left in the bearer structure. In order to fix the issue, the process of resetting the discovery request handler is optimized: the discovery request handler and request buffer are just reset instead of being freed, allocated and initialized. As the request point is always valid and the request's lock is taken while the request handler is reset, the race doesn't happen any more. Reported-by: Erik Hugne Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 3 +-- net/tipc/discover.c | 53 +++++++++++++++++++++++++++++++++++------------------ net/tipc/discover.h | 1 + 3 files changed, 37 insertions(+), 20 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3abd9702b887..f3259d4133b6 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -365,9 +365,8 @@ restart: static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_disc_delete(b_ptr->link_req); tipc_link_reset_list(b_ptr->identity); - tipc_disc_create(b_ptr, &b_ptr->bcast_addr); + tipc_disc_reset(b_ptr); return 0; } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 3a8f211f08c7..ada42e436f5e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -71,22 +71,19 @@ struct tipc_link_req { * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr) +static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, + struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); - } - return buf; + msg = buf_msg(buf); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); + msg_set_node_sig(msg, tipc_random); + msg_set_dest_domain(msg, dest_domain); + msg_set_bc_netid(msg, tipc_net_id); + b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); } /** @@ -241,8 +238,9 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) link_fully_up = link_working_working(link); if ((type == DSC_REQ_MSG) && !link_fully_up) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); + rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { + tipc_disc_init_msg(rbuf, DSC_RESP_MSG, b_ptr); tipc_bearer_send(b_ptr->identity, rbuf, &media_addr); kfree_skb(rbuf); } @@ -349,12 +347,11 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) if (!req) return -ENOMEM; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr); - if (!req->buf) { - kfree(req); - return -ENOMSG; - } + req->buf = tipc_buf_acquire(INT_H_SIZE); + if (!req->buf) + return -ENOMEM; + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; @@ -379,3 +376,23 @@ void tipc_disc_delete(struct tipc_link_req *req) kfree_skb(req->buf); kfree(req); } + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @b_ptr: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct tipc_bearer *b_ptr) +{ + struct tipc_link_req *req = b_ptr->link_req; + + spin_lock_bh(&req->lock); + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + k_start_timer(&req->timer, req->timer_intv); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + spin_unlock_bh(&req->lock); +} diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 07f34729459d..515b57392f4d 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -41,6 +41,7 @@ struct tipc_link_req; int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_reset(struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); -- cgit v1.2.3 From a89778d8baf19cd7e728d81121a294a06cedaad1 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 24 Apr 2014 16:26:46 +0200 Subject: tipc: add support for link state subscriptions When links are established over a bearer plane, we create a node local publication containing information about the peer node and bearer plane. This allows TIPC applications to use the standard TIPC topology server subscription mechanism to get notifications when a link goes up or down. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 1 + net/tipc/node.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 852373d27dbb..53cd7902d34e 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -87,6 +87,7 @@ static inline unsigned int tipc_node(__u32 addr) #define TIPC_CFG_SRV 0 /* configuration service name type */ #define TIPC_TOP_SRV 1 /* topology service name type */ +#define TIPC_LINK_STATE 2 /* link state name type */ #define TIPC_RESERVED_TYPES 64 /* lowest user-publishable name type */ /* diff --git a/net/tipc/node.c b/net/tipc/node.c index be90115cda1a..3b86a74cb31f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -144,9 +144,11 @@ void tipc_node_stop(void) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; + u32 addr = n_ptr->addr; n_ptr->working_links++; - + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, + l_ptr->bearer_id, addr); pr_info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->net_plane); @@ -203,8 +205,10 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; + u32 addr = n_ptr->addr; n_ptr->working_links--; + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", -- cgit v1.2.3 From 78acb1f9b898e85fa2c1e28e700b54b66b288e8d Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 24 Apr 2014 16:26:47 +0200 Subject: tipc: add ioctl to fetch link names We add a new ioctl for AF_TIPC that can be used to fetch the logical name for a link to a remote node on a given bearer. This should be used in combination with link state subscriptions. The logical name size limit definitions are moved to tipc.h, as they are now also needed by the new ioctl. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 22 ++++++++++++++++++++++ include/uapi/linux/tipc_config.h | 10 +--------- net/tipc/node.c | 27 +++++++++++++++++++++++++++ net/tipc/node.h | 1 + net/tipc/socket.c | 29 ++++++++++++++++++++++++++--- 5 files changed, 77 insertions(+), 12 deletions(-) (limited to 'net/tipc') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 53cd7902d34e..6f71b9b41595 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -38,6 +38,7 @@ #define _LINUX_TIPC_H_ #include +#include /* * TIPC addressing primitives @@ -207,4 +208,25 @@ struct sockaddr_tipc { #define TIPC_NODE_RECVQ_DEPTH 131 /* Default: none (read only) */ #define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */ +/* + * Maximum sizes of TIPC bearer-related names (including terminating NULL) + * The string formatting for each name element is: + * media: media + * interface: media:interface name + * link: Z.C.N:interface-Z.C.N:interface + * + */ + +#define TIPC_MAX_MEDIA_NAME 16 +#define TIPC_MAX_IF_NAME 16 +#define TIPC_MAX_BEARER_NAME 32 +#define TIPC_MAX_LINK_NAME 60 + +#define SIOCGETLINKNAME SIOCPROTOPRIVATE + +struct tipc_sioc_ln_req { + __u32 peer; + __u32 bearer_id; + char linkname[TIPC_MAX_LINK_NAME]; +}; #endif diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 6b0bff09b3a7..41a76acbb305 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -39,6 +39,7 @@ #include #include +#include #include #ifndef __KERNEL__ @@ -154,15 +155,6 @@ #define TIPC_TLV_NAME_TBL_QUERY 25 /* struct tipc_name_table_query */ #define TIPC_TLV_PORT_REF 26 /* 32-bit port reference */ -/* - * Maximum sizes of TIPC bearer-related names (including terminating NUL) - */ - -#define TIPC_MAX_MEDIA_NAME 16 /* format = media */ -#define TIPC_MAX_IF_NAME 16 /* format = interface */ -#define TIPC_MAX_BEARER_NAME 32 /* format = media:interface */ -#define TIPC_MAX_LINK_NAME 60 /* format = Z.C.N:interface-Z.C.N:interface */ - /* * Link priority limits (min, default, max, media default) */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 3b86a74cb31f..1f938f3dba4b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -438,3 +438,30 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) rcu_read_unlock(); return buf; } + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +{ + struct tipc_link *link; + struct tipc_node *node = tipc_node_find(addr); + + if ((bearer_id > MAX_BEARERS) || !node) + return -EINVAL; + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) { + strncpy(linkname, link->name, len); + tipc_node_unlock(node); + return 0; + } + tipc_node_unlock(node); + return -EINVAL; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 7cbb8cec1a93..411b19114064 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -118,6 +118,7 @@ int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); +int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); static inline void tipc_node_lock(struct tipc_node *n_ptr) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3c0256962f7d..3f9912f87d0d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,6 +36,7 @@ #include "core.h" #include "port.h" +#include "node.h" #include @@ -1905,6 +1906,28 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } +int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +{ + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + break; + default: + return -ENOIOCTLCMD; + } +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -1917,7 +1940,7 @@ static const struct proto_ops msg_ops = { .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1938,7 +1961,7 @@ static const struct proto_ops packet_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1959,7 +1982,7 @@ static const struct proto_ops stream_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, -- cgit v1.2.3 From 22e7987ae7d8d13beeaf0717215800f7e803ddcf Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 25 Apr 2014 10:44:15 +0800 Subject: tipc: fix a possible memory leak The commit a8b9b96e959f3c035af20b1bd2ba67b0b7269b19 ("tipc: fix race in disc create/delete") leads to the following static checker warning: net/tipc/discover.c:352 tipc_disc_create() warn: possible memory leak of 'req' The risk of memory leak really exists in practice. Especially when it's failed to allocate memory for "req->buf", tipc_disc_create() doesn't free its allocated memory, instead just directly returns with ENOMEM error code. In this situation, memory leak, of course, happens. Reported-by: Dan Carpenter Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/discover.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index ada42e436f5e..bd35c4a0746f 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -348,8 +348,10 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) return -ENOMEM; req->buf = tipc_buf_acquire(INT_H_SIZE); - if (!req->buf) + if (!req->buf) { + kfree(req); return -ENOMEM; + } tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); -- cgit v1.2.3 From d7bb74c38cb3de40600dcbba50a4f84df290dc91 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 28 Apr 2014 08:20:09 +0200 Subject: tipc: fix out of bounds indexing Commit 78acb1f9b898e85fa2c1e28e700b54b66b288e8d ("tipc: add ioctl to fetch link names") introduced a buffer overflow bug where specially crafted ioctl requests could cause out-of-bounds indexing of the node->links array. This was caused by an incorrect check vs MAX_BEARERS, and the static code checker complaint is: net/tipc/node.c:459 tipc_node_get_linkname() error: buffer overflow 'node->links' 2 <= 2 Signed-off-by: Erik Hugne Reported-by: Dan Carpenter Signed-off-by: David S. Miller --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 1f938f3dba4b..6d6543e88c2c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -453,7 +453,7 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) struct tipc_link *link; struct tipc_node *node = tipc_node_find(addr); - if ((bearer_id > MAX_BEARERS) || !node) + if ((bearer_id >= MAX_BEARERS) || !node) return -EINVAL; tipc_node_lock(node); link = node->links[bearer_id]; -- cgit v1.2.3 From eab8c045732635e3833a5d58b17c6da08ff71f9e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 28 Apr 2014 18:00:10 +0800 Subject: tipc: move the delivery of named messages out of nametbl lock Commit a89778d8baf19cd7e728d81121a294a06cedaad1 ("tipc: add support for link state subscriptions") introduced below possible deadlock scenario: CPU0 CPU1 T0: tipc_publish() link_timeout() T1: tipc_nametbl_publish() [grab node lock]* T2: [grab nametbl write lock]* link_state_event() T3: named_cluster_distribute() link_activate() T4: [grab node lock]* tipc_node_link_up() T5: tipc_nametbl_publish() T6: [grab nametble write lock]* The opposite order of holding nametbl write lock and node lock on above two different paths may result in a deadlock. If we move the the delivery of named messages via link out of name nametbl lock, the reverse order of holding locks will be eliminated, as a result, the deadlock will be killed as well. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 18 +++++++++--------- net/tipc/name_distr.h | 5 +++-- net/tipc/name_table.c | 12 ++++++++++-- 3 files changed, 22 insertions(+), 13 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 36a72822601c..974a73f3d876 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -127,7 +127,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) return buf; } -static void named_cluster_distribute(struct sk_buff *buf) +void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; @@ -156,7 +156,7 @@ static void named_cluster_distribute(struct sk_buff *buf) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -void tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -165,23 +165,23 @@ void tipc_named_publish(struct publication *publ) publ_lists[publ->scope]->size++; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -void tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -190,17 +190,17 @@ void tipc_named_withdraw(struct publication *publ) publ_lists[publ->scope]->size--; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } /* diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 9b312ccfd43e..47ff829f9361 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -39,8 +39,9 @@ #include "name_table.h" -void tipc_named_publish(struct publication *publ); -void tipc_named_withdraw(struct publication *publ); +struct sk_buff *tipc_named_publish(struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct publication *publ); +void named_cluster_distribute(struct sk_buff *buf); void tipc_named_node_up(unsigned long node); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 042e8e3cabc0..9bcf4b58853f 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -664,6 +664,7 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key) { struct publication *publ; + struct sk_buff *buf = NULL; if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", @@ -676,9 +677,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, tipc_own_addr, port_ref, key); if (likely(publ)) { table.local_publ_count++; - tipc_named_publish(publ); + buf = tipc_named_publish(publ); } write_unlock_bh(&tipc_nametbl_lock); + + if (buf) + named_cluster_distribute(buf); return publ; } @@ -688,15 +692,19 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; + struct sk_buff *buf; write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { table.local_publ_count--; - tipc_named_withdraw(publ); + buf = tipc_named_withdraw(publ); write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); kfree(publ); + + if (buf) + named_cluster_distribute(buf); return 1; } write_unlock_bh(&tipc_nametbl_lock); -- cgit v1.2.3 From 1621b94d2a655c8548ddbdfc8ccf907a5bbdc860 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 29 Apr 2014 11:12:18 +0800 Subject: tipc: fix memory leak of publications Commit 1bb8dce57f4d15233688c68990852a10eb1cd79f ("tipc: fix memory leak during module removal") introduced a memory leak issue: when name table is stopped, it's forgotten that publication instances are freed properly. Additionally the useless "continue" statement in tipc_nametbl_stop() is removed as well. Reported-by: Jason Signed-off-by: Ying Xue Acked-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/name_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9bcf4b58853f..9d7d37d95187 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -969,6 +969,7 @@ static void tipc_purge_publications(struct name_seq *seq) list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + kfree(publ); } } @@ -990,7 +991,6 @@ void tipc_nametbl_stop(void) hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { tipc_purge_publications(seq); } - continue; } kfree(table.types); table.types = NULL; -- cgit v1.2.3 From 5356f3d7d48af72eb2a14b643d5563f068c44fe0 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:09 +0800 Subject: tipc: always use tipc_node_lock() to hold node lock Although we obtain node lock with tipc_node_lock() in most time, there are still places where we directly use native spin lock interface to grab node lock. But as we will do more jobs in the future when node lock is released, we should ensure that tipc_node_lock() is always called when node lock is taken. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 12 ++++++------ net/tipc/name_distr.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index c723ee90219d..3a801452643d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -297,14 +297,14 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) { tipc_link_reset(l_ptr); if (shutting_down || !tipc_node_is_up(n_ptr)) { tipc_node_detach_link(l_ptr->owner, l_ptr); tipc_link_reset_fragments(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); /* Nobody else can access this link now: */ del_timer_sync(&l_ptr->timer); @@ -312,12 +312,12 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) } else { /* Detach/delete when failover is finished: */ l_ptr->flags |= LINK_STOPPED; - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); del_timer_sync(&l_ptr->timer); } continue; } - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } @@ -474,11 +474,11 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 974a73f3d876..8465263246c3 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -135,18 +135,18 @@ void named_cluster_distribute(struct sk_buff *buf) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[n_ptr->addr & 1]; if (l_ptr) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) { - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); break; } msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); __tipc_link_xmit(l_ptr, buf_copy); } - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); -- cgit v1.2.3 From 486f930ac546914550b84abbc227867cc1be1f95 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:10 +0800 Subject: tipc: adjust order of variables in tipc_node structure Move more frequently used variables up to the head of tipc_node structure, hopefully improving a bit performance. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.h | 63 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.h b/net/tipc/node.h index 411b19114064..bb7f708ce19b 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -52,57 +52,62 @@ #define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ #define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ +/** + * struct tipc_node_bclink - TIPC node bclink structure + * @acked: sequence # of last outbound b'cast message acknowledged by node + * @last_in: sequence # of last in-sequence b'cast message received from node + * @last_sent: sequence # of last b'cast message sent by node + * @oos_state: state tracker for handling OOS b'cast messages + * @deferred_size: number of OOS b'cast messages in deferred queue + * @deferred_head: oldest OOS b'cast message received from node + * @deferred_tail: newest OOS b'cast message received from node + * @reasm_head: broadcast reassembly queue head from node + * @reasm_tail: last broadcast fragment received from node + * @recv_permitted: true if node is allowed to receive b'cast messages + */ +struct tipc_node_bclink { + u32 acked; + u32 last_in; + u32 last_sent; + u32 oos_state; + u32 deferred_size; + struct sk_buff *deferred_head; + struct sk_buff *deferred_tail; + struct sk_buff *reasm_head; + struct sk_buff *reasm_tail; + bool recv_permitted; +}; + /** * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure * @hash: links to adjacent nodes in unsorted hash chain - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node - * @working_links: number of working links to node (both active and standby) * @block_setup: bit mask of conditions preventing link establishment to node + * @bclink: broadcast-related info + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @signature: node instance identifier - * @bclink: broadcast-related info + * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_size: number of OOS b'cast messages in deferred queue - * @deferred_head: oldest OOS b'cast message received from node - * @deferred_tail: newest OOS b'cast message received from node - * @reasm_head: broadcast reassembly queue head from node - * @reasm_tail: last broadcast fragment received from node - * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node { u32 addr; spinlock_t lock; struct hlist_node hash; - struct list_head list; - struct list_head nsub; struct tipc_link *active_links[2]; struct tipc_link *links[MAX_BEARERS]; + int block_setup; + struct tipc_node_bclink bclink; + struct list_head list; int link_cnt; int working_links; - int block_setup; u32 signature; + struct list_head nsub; struct rcu_head rcu; - struct { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff *deferred_head; - struct sk_buff *deferred_tail; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; - bool recv_permitted; - } bclink; }; extern struct list_head tipc_node_list; -- cgit v1.2.3 From 10f465c4966fbc8f50a59480d37a3451f6f3d564 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:11 +0800 Subject: tipc: rename setup_blocked variable of node struct to flags Rename setup_blocked variable of node struct to a more common name called "flags", which will be used to represent kinds of node states. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 20 ++++++++++---------- net/tipc/node.c | 6 +++--- net/tipc/node.h | 24 ++++++++++++++++++------ 3 files changed, 31 insertions(+), 19 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 3a801452643d..ac074aaf104d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1495,14 +1495,14 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) goto unlock_discard; /* Verify that communication with node is currently allowed */ - if ((n_ptr->block_setup & WAIT_PEER_DOWN) && - msg_user(msg) == LINK_PROTOCOL && - (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && - !msg_redundant_link(msg)) - n_ptr->block_setup &= ~WAIT_PEER_DOWN; - - if (n_ptr->block_setup) + if ((n_ptr->flags & TIPC_NODE_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->flags &= ~TIPC_NODE_DOWN; + + if (tipc_node_blocked(n_ptr)) goto unlock_discard; /* Validate message sequence number info */ @@ -1744,7 +1744,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, return; /* Abort non-RESET send if communication with node is prohibited */ - if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) + if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG)) return; /* Create protocol message with "out-of-sequence" sequence number */ @@ -1859,7 +1859,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) * peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ - l_ptr->owner->block_setup = WAIT_NODE_DOWN; + l_ptr->owner->flags = TIPC_NODE_RESET; } link_state_event(l_ptr, RESET_MSG); diff --git a/net/tipc/node.c b/net/tipc/node.c index 6d6543e88c2c..2b0a0849d4cc 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -108,7 +108,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->block_setup = WAIT_PEER_DOWN; + n_ptr->flags = TIPC_NODE_DOWN; n_ptr->signature = INVALID_NODE_SIG; tipc_num_nodes++; @@ -280,7 +280,7 @@ static void node_name_purge_complete(unsigned long node_addr) n_ptr = tipc_node_find(node_addr); if (n_ptr) { tipc_node_lock(n_ptr); - n_ptr->block_setup &= ~WAIT_NAMES_GONE; + n_ptr->flags &= ~TIPC_NAMES_GONE; tipc_node_unlock(n_ptr); } } @@ -324,7 +324,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_nodesub_notify(n_ptr); /* Prevent re-contact with node until cleanup is done */ - n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; + n_ptr->flags = TIPC_NODE_DOWN | TIPC_NAMES_GONE; tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index bb7f708ce19b..242b918ddd14 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -47,10 +47,16 @@ */ #define INVALID_NODE_SIG 0x10000 -/* Flags used to block (re)establishment of contact with a neighboring node */ -#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ -#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ -#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ +/* Flags used to block (re)establishment of contact with a neighboring node + * TIPC_NODE_DOWN: indicate node is down + * TIPC_NAMES_GONE: indicate the node's publications are purged + * TIPC_NODE_RESET: indicate node is reset + */ +enum { + TIPC_NODE_DOWN = (1 << 1), + TIPC_NAMES_GONE = (1 << 2), + TIPC_NODE_RESET = (1 << 3) +}; /** * struct tipc_node_bclink - TIPC node bclink structure @@ -85,7 +91,7 @@ struct tipc_node_bclink { * @hash: links to adjacent nodes in unsorted hash chain * @active_links: pointers to active links to node * @links: pointers to all links to node - * @block_setup: bit mask of conditions preventing link establishment to node + * @flags: bit mask of conditions preventing link establishment to node * @bclink: broadcast-related info * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -100,7 +106,7 @@ struct tipc_node { struct hlist_node hash; struct tipc_link *active_links[2]; struct tipc_link *links[MAX_BEARERS]; - int block_setup; + unsigned int flags; struct tipc_node_bclink bclink; struct list_head list; int link_cnt; @@ -135,4 +141,10 @@ static inline void tipc_node_unlock(struct tipc_node *n_ptr) spin_unlock_bh(&n_ptr->lock); } +static inline bool tipc_node_blocked(struct tipc_node *node) +{ + return (node->flags & (TIPC_NODE_DOWN | TIPC_NAMES_GONE | + TIPC_NODE_RESET)); +} + #endif -- cgit v1.2.3 From 9db9fdd1983eb960182d72f95d77b91b3a5173d0 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:12 +0800 Subject: tipc: avoid to asynchronously notify subscriptions Postpone the actions of notifying subscriptions until after node lock is released, avoiding to asynchronously execute registered handlers when node is lost. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 23 +++++++++++++++++++++-- net/tipc/node.h | 15 +++++++-------- net/tipc/node_subscr.c | 9 ++++----- net/tipc/node_subscr.h | 2 +- 4 files changed, 33 insertions(+), 16 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 2b0a0849d4cc..befbcc9eade6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -321,10 +321,10 @@ static void node_lost_contact(struct tipc_node *n_ptr) } /* Notify subscribers */ - tipc_nodesub_notify(n_ptr); + n_ptr->flags = TIPC_NODE_LOST; /* Prevent re-contact with node until cleanup is done */ - n_ptr->flags = TIPC_NODE_DOWN | TIPC_NAMES_GONE; + n_ptr->flags |= TIPC_NODE_DOWN | TIPC_NAMES_GONE; tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); } @@ -465,3 +465,22 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) tipc_node_unlock(node); return -EINVAL; } + +void tipc_node_unlock(struct tipc_node *node) +{ + LIST_HEAD(nsub_list); + + if (likely(!node->flags)) { + spin_unlock_bh(&node->lock); + return; + } + + if (node->flags & TIPC_NODE_LOST) { + list_replace_init(&node->nsub, &nsub_list); + node->flags &= ~TIPC_NODE_LOST; + } + spin_unlock_bh(&node->lock); + + if (!list_empty(&nsub_list)) + tipc_nodesub_notify(&nsub_list); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 242b918ddd14..fd86726ed192 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,11 +51,14 @@ * TIPC_NODE_DOWN: indicate node is down * TIPC_NAMES_GONE: indicate the node's publications are purged * TIPC_NODE_RESET: indicate node is reset + * TIPC_NODE_LOST: indicate node is lost and it's used to notify subscriptions + * when node lock is released */ enum { TIPC_NODE_DOWN = (1 << 1), TIPC_NAMES_GONE = (1 << 2), - TIPC_NODE_RESET = (1 << 3) + TIPC_NODE_RESET = (1 << 3), + TIPC_NODE_LOST = (1 << 4) }; /** @@ -130,15 +133,11 @@ int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +void tipc_node_unlock(struct tipc_node *node); -static inline void tipc_node_lock(struct tipc_node *n_ptr) +static inline void tipc_node_lock(struct tipc_node *node) { - spin_lock_bh(&n_ptr->lock); -} - -static inline void tipc_node_unlock(struct tipc_node *n_ptr) -{ - spin_unlock_bh(&n_ptr->lock); + spin_lock_bh(&node->lock); } static inline bool tipc_node_blocked(struct tipc_node *node) diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 8a7384c04add..7c59ab1d6ecb 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -81,14 +81,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) * * Note: node is locked by caller */ -void tipc_nodesub_notify(struct tipc_node *node) +void tipc_nodesub_notify(struct list_head *nsub_list) { - struct tipc_node_subscr *ns; + struct tipc_node_subscr *ns, *safe; - list_for_each_entry(ns, &node->nsub, nodesub_list) { + list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { if (ns->handle_node_down) { - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); + ns->handle_node_down(ns->usr_handle); ns->handle_node_down = NULL; } } diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index c95d20727ded..d91b8cc81e3d 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -58,6 +58,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); -void tipc_nodesub_notify(struct tipc_node *node); +void tipc_nodesub_notify(struct list_head *nsub_list); #endif -- cgit v1.2.3 From 9d561949685749be3d97239eab7d85aa78718108 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:13 +0800 Subject: tipc: remove TIPC_NAMES_GONE node flag Since previously what all publications pertaining to the lost node were removed from name table was finished in tasklet context asynchronously, we need to TIPC_NAMES_GONE flag indicating whether the node cleanup work is finished or not. But now as the cleanup work has been finished when node lock is released, the flag becomes meaningless for us. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 22 ++++------------------ net/tipc/node.h | 8 +++----- 2 files changed, 7 insertions(+), 23 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index befbcc9eade6..c3a36bba9952 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -273,18 +273,6 @@ static void node_established_contact(struct tipc_node *n_ptr) tipc_bclink_add_node(n_ptr->addr); } -static void node_name_purge_complete(unsigned long node_addr) -{ - struct tipc_node *n_ptr; - - n_ptr = tipc_node_find(node_addr); - if (n_ptr) { - tipc_node_lock(n_ptr); - n_ptr->flags &= ~TIPC_NAMES_GONE; - tipc_node_unlock(n_ptr); - } -} - static void node_lost_contact(struct tipc_node *n_ptr) { char addr_string[16]; @@ -320,12 +308,10 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_link_reset_fragments(l_ptr); } - /* Notify subscribers */ - n_ptr->flags = TIPC_NODE_LOST; - - /* Prevent re-contact with node until cleanup is done */ - n_ptr->flags |= TIPC_NODE_DOWN | TIPC_NAMES_GONE; - tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); + /* Notify subscribers and prevent re-contact with node until + * cleanup is done. + */ + n_ptr->flags = TIPC_NODE_DOWN | TIPC_NODE_LOST; } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) diff --git a/net/tipc/node.h b/net/tipc/node.h index fd86726ed192..4bd5eff82ce0 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -49,16 +49,14 @@ /* Flags used to block (re)establishment of contact with a neighboring node * TIPC_NODE_DOWN: indicate node is down - * TIPC_NAMES_GONE: indicate the node's publications are purged * TIPC_NODE_RESET: indicate node is reset * TIPC_NODE_LOST: indicate node is lost and it's used to notify subscriptions * when node lock is released */ enum { TIPC_NODE_DOWN = (1 << 1), - TIPC_NAMES_GONE = (1 << 2), - TIPC_NODE_RESET = (1 << 3), - TIPC_NODE_LOST = (1 << 4) + TIPC_NODE_RESET = (1 << 2), + TIPC_NODE_LOST = (1 << 3) }; /** @@ -142,7 +140,7 @@ static inline void tipc_node_lock(struct tipc_node *node) static inline bool tipc_node_blocked(struct tipc_node *node) { - return (node->flags & (TIPC_NODE_DOWN | TIPC_NAMES_GONE | + return (node->flags & (TIPC_NODE_DOWN | TIPC_NODE_LOST | TIPC_NODE_RESET)); } -- cgit v1.2.3 From ca0c42732c512a12fabe677594840f31861dd31a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:14 +0800 Subject: tipc: avoid to asynchronously deliver name tables to peer node Postpone the actions of delivering name tables until after node lock is released, avoiding to do it under asynchronous context. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 52 ++------------------------------------------------- net/tipc/name_distr.h | 30 ++++++++++++++++++++++++++++- net/tipc/node.c | 16 +++++++++++++++- net/tipc/node.h | 8 ++++++-- 4 files changed, 52 insertions(+), 54 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 8465263246c3..8ce730984aa1 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -38,34 +38,6 @@ #include "link.h" #include "name_distr.h" -#define ITEM_SIZE sizeof(struct distr_item) - -/** - * struct distr_item - publication info distributed to other nodes - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @ref: publishing port reference - * @key: publication key - * - * ===> All fields are stored in network byte order. <=== - * - * First 3 fields identify (name or) name sequence being published. - * Reference field uniquely identifies port that published name sequence. - * Key field uniquely identifies publication, in the event a port has - * multiple publications of the same name sequence. - * - * Note: There is no field that identifies the publishing node because it is - * the same for all items contained within a publication message. - */ -struct distr_item { - __be32 type; - __be32 lower; - __be32 upper; - __be32 ref; - __be32 key; -}; - /** * struct publ_list - list of publications made by this node * @list: circular list of publications @@ -239,29 +211,9 @@ static void named_distribute(struct list_head *message_list, u32 node, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(unsigned long nodearg) +void tipc_named_node_up(u32 max_item_buf, u32 node) { - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct list_head message_list; - u32 node = (u32)nodearg; - u32 max_item_buf = 0; - - /* compute maximum amount of publication data to send per message */ - n_ptr = tipc_node_find(node); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) - max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / - ITEM_SIZE) * ITEM_SIZE; - tipc_node_unlock(n_ptr); - } - if (!max_item_buf) - return; - - /* create list of publication messages, then send them as a unit */ - INIT_LIST_HEAD(&message_list); + LIST_HEAD(message_list); read_lock_bh(&tipc_nametbl_lock); named_distribute(&message_list, node, &publ_cluster, max_item_buf); diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 47ff829f9361..b2eed4ec1526 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -39,10 +39,38 @@ #include "name_table.h" +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @ref: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 ref; + __be32 key; +}; + struct sk_buff *tipc_named_publish(struct publication *publ); struct sk_buff *tipc_named_withdraw(struct publication *publ); void named_cluster_distribute(struct sk_buff *buf); -void tipc_named_node_up(unsigned long node); +void tipc_named_node_up(u32 max_item_buf, u32 node); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index c3a36bba9952..74efebc1cb7a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -267,7 +267,7 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) static void node_established_contact(struct tipc_node *n_ptr) { - tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); + n_ptr->flags |= TIPC_NODE_UP; n_ptr->bclink.oos_state = 0; n_ptr->bclink.acked = tipc_bclink_get_last_sent(); tipc_bclink_add_node(n_ptr->addr); @@ -455,6 +455,9 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { LIST_HEAD(nsub_list); + struct tipc_link *link; + int pkt_sz = 0; + u32 addr = 0; if (likely(!node->flags)) { spin_unlock_bh(&node->lock); @@ -465,8 +468,19 @@ void tipc_node_unlock(struct tipc_node *node) list_replace_init(&node->nsub, &nsub_list); node->flags &= ~TIPC_NODE_LOST; } + if (node->flags & TIPC_NODE_UP) { + link = node->active_links[0]; + node->flags &= ~TIPC_NODE_UP; + if (link) { + pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) * + ITEM_SIZE; + addr = node->addr; + } + } spin_unlock_bh(&node->lock); if (!list_empty(&nsub_list)) tipc_nodesub_notify(&nsub_list); + if (pkt_sz) + tipc_named_node_up(pkt_sz, addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4bd5eff82ce0..38f710fb75dc 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -48,15 +48,19 @@ #define INVALID_NODE_SIG 0x10000 /* Flags used to block (re)establishment of contact with a neighboring node - * TIPC_NODE_DOWN: indicate node is down + * TIPC_NODE_DOWN: indicate node is down and it's used to block the node's + * links until RESET or ACTIVE message arrives * TIPC_NODE_RESET: indicate node is reset * TIPC_NODE_LOST: indicate node is lost and it's used to notify subscriptions * when node lock is released + * TIPC_NODE_UP: indicate node is up and it's used to deliver local name table + * when node lock is released */ enum { TIPC_NODE_DOWN = (1 << 1), TIPC_NODE_RESET = (1 << 2), - TIPC_NODE_LOST = (1 << 3) + TIPC_NODE_LOST = (1 << 3), + TIPC_NODE_UP = (1 << 4) }; /** -- cgit v1.2.3 From d69afc90b8d47e471d2870f090f662e569b08407 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:15 +0800 Subject: tipc: define new functions to operate bc_lock As we are going to do more jobs when bc_lock is released, the two operations of holding/releasing the lock should be encapsulated with functions. In addition, we move bc_lock spin lock into tipc_bclink structure avoiding to define the global variable. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 96 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 43 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 119a59b4bec6..9eceaa72f21b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -71,7 +71,7 @@ struct tipc_bcbearer_pair { * Note: The fields labelled "temporary" are incorporated into the bearer * to avoid consuming potentially limited stack space through the use of * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bc_lock". + * prevented through use of the spinlock "bclink_lock". */ struct tipc_bcbearer { struct tipc_bearer bearer; @@ -84,6 +84,7 @@ struct tipc_bcbearer { /** * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node * @bcast_nodes: map of broadcast-capable nodes @@ -92,6 +93,7 @@ struct tipc_bcbearer { * Handles sequence numbering, fragmentation, bundling, etc. */ struct tipc_bclink { + spinlock_t lock; struct tipc_link link; struct tipc_node node; struct tipc_node_map bcast_nodes; @@ -105,8 +107,6 @@ static struct tipc_bcbearer *bcbearer = &bcast_bearer; static struct tipc_bclink *bclink = &bcast_link; static struct tipc_link *bcl = &bcast_link.link; -static DEFINE_SPINLOCK(bc_lock); - const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, @@ -115,6 +115,16 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_bclink_lock(void) +{ + spin_lock_bh(&bclink->lock); +} + +static void tipc_bclink_unlock(void) +{ + spin_unlock_bh(&bclink->lock); +} + static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -132,16 +142,16 @@ static void bcbuf_decr_acks(struct sk_buff *buf) void tipc_bclink_add_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_add(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } void tipc_bclink_remove_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_remove(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } static void bclink_set_last_sent(void) @@ -167,7 +177,7 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * - * Called with bc_lock locked + * Called with bclink_lock locked */ struct tipc_node *tipc_bclink_retransmit_to(void) { @@ -179,7 +189,7 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit * - * Called with bc_lock locked + * Called with bclink_lock locked */ static void bclink_retransmit_pkt(u32 after, u32 to) { @@ -196,7 +206,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * @n_ptr: node that sent acknowledgement info * @acked: broadcast sequence # that has been acknowledged * - * Node is locked, bc_lock unlocked. + * Node is locked, bclink_lock unlocked. */ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { @@ -204,8 +214,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *next; unsigned int released = 0; - spin_lock_bh(&bc_lock); - + tipc_bclink_lock(); /* Bail out if tx queue is empty (no clean up is required) */ crs = bcl->first_out; if (!crs) @@ -269,7 +278,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) if (unlikely(released && !list_empty(&bcl->waiting_ports))) tipc_link_wakeup_ports(bcl, 0); exit: - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } /** @@ -322,10 +331,10 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) ? buf_seqno(n_ptr->bclink.deferred_head) - 1 : n_ptr->bclink.last_sent); - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_bearer_send(MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -362,7 +371,7 @@ int tipc_bclink_xmit(struct sk_buff *buf) { int res; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (!bclink->bcast_nodes.count) { res = msg_data_sz(buf_msg(buf)); @@ -377,14 +386,14 @@ int tipc_bclink_xmit(struct sk_buff *buf) bcl->stats.accu_queue_sz += bcl->out_queue_size; } exit: - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return res; } /** * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet * - * Called with both sending node's lock and bc_lock taken. + * Called with both sending node's lock and bclink_lock taken. */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { @@ -439,12 +448,12 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (msg_destnode(msg) == tipc_own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bcl->stats.recv_nacks++; bclink->retransmit_to = node; bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } else { tipc_node_unlock(node); bclink_peek_nack(msg); @@ -462,20 +471,20 @@ receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); if (likely(msg_mcast(msg))) tipc_port_mcast_rcv(buf, NULL); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { @@ -485,28 +494,28 @@ receive: &buf); if (ret == LINK_REASM_ERROR) goto unlock; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; if (ret == LINK_REASM_COMPLETE) { bcl->stats.recv_fragmented++; /* Point msg to inner header */ msg = buf_msg(buf); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); goto receive; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_named_rcv(buf); } else { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); kfree_skb(buf); } @@ -552,14 +561,14 @@ receive: } else deferred = 0; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); unlock: tipc_node_unlock(node); @@ -663,7 +672,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) int b_index; int pri; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (action) tipc_nmap_add(nm_ptr, node); @@ -710,7 +719,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) bp_curr++; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } @@ -722,7 +731,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) if (!bcl) return 0; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); s = &bcl->stats; @@ -751,7 +760,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return ret; } @@ -760,9 +769,9 @@ int tipc_bclink_reset_stats(void) if (!bcl) return -ENOPROTOOPT; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); memset(&bcl->stats, 0, sizeof(bcl->stats)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } @@ -773,9 +782,9 @@ int tipc_bclink_set_queue_limits(u32 limit) if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_set_queue_limits(bcl, limit); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } @@ -785,6 +794,7 @@ void tipc_bclink_init(void) bcbearer->media.send_msg = tipc_bcbearer_send; sprintf(bcbearer->media.name, "tipc-broadcast"); + spin_lock_init(&bclink->lock); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); @@ -799,9 +809,9 @@ void tipc_bclink_init(void) void tipc_bclink_stop(void) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_purge_queues(bcl); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); memset(bclink, 0, sizeof(*bclink)); -- cgit v1.2.3 From eb8b00f5f248c50603bca383792ac3a618297be0 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:16 +0800 Subject: tipc: convert allocations of global variables associated with bclink Convert allocations of global variables associated with bclink from static way to dynamical way for the convenience of bclink instance initialisation. Meanwhile, this also helps TIPC support name space in the future easily. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 28 +++++++++++++++++++--------- net/tipc/bcast.h | 2 +- net/tipc/config.c | 6 ++++-- net/tipc/net.c | 9 +++++++-- net/tipc/net.h | 2 +- 5 files changed, 32 insertions(+), 15 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9eceaa72f21b..ef8cff4ad743 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -100,12 +100,9 @@ struct tipc_bclink { struct tipc_node *retransmit_to; }; -static struct tipc_bcbearer bcast_bearer; -static struct tipc_bclink bcast_link; - -static struct tipc_bcbearer *bcbearer = &bcast_bearer; -static struct tipc_bclink *bclink = &bcast_link; -static struct tipc_link *bcl = &bcast_link.link; +static struct tipc_bcbearer *bcbearer; +static struct tipc_bclink *bclink; +static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; @@ -788,8 +785,19 @@ int tipc_bclink_set_queue_limits(u32 limit) return 0; } -void tipc_bclink_init(void) +int tipc_bclink_init(void) { + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); + if (!bcbearer) + return -ENOMEM; + + bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); + if (!bclink) { + kfree(bcbearer); + return -ENOMEM; + } + + bcl = &bclink->link; bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; sprintf(bcbearer->media.name, "tipc-broadcast"); @@ -805,6 +813,7 @@ void tipc_bclink_init(void) rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + return 0; } void tipc_bclink_stop(void) @@ -814,8 +823,9 @@ void tipc_bclink_stop(void) tipc_bclink_unlock(); RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); - memset(bclink, 0, sizeof(*bclink)); - memset(bcbearer, 0, sizeof(*bcbearer)); + synchronize_net(); + kfree(bcbearer); + kfree(bclink); } /** diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 7c1ef1b3d7b3..ea162c7b30ee 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -81,7 +81,7 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); void tipc_port_list_free(struct tipc_port_list *pl_ptr); -void tipc_bclink_init(void); +int tipc_bclink_init(void); void tipc_bclink_stop(void); void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); diff --git a/net/tipc/config.c b/net/tipc/config.c index 251f5a2028e4..2b42403ad33a 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -177,8 +177,10 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_net_start(addr); - return tipc_cfg_reply_none(); + if (!tipc_net_start(addr)) + return tipc_cfg_reply_none(); + + return tipc_cfg_reply_error_string("cannot change to network mode"); } static struct sk_buff *cfg_set_max_ports(void) diff --git a/net/tipc/net.c b/net/tipc/net.c index 75bb39025d53..f8fc95d58c0d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -164,20 +164,25 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_xmit(buf, dnode, msg_link_selector(msg)); } -void tipc_net_start(u32 addr) +int tipc_net_start(u32 addr) { char addr_string[16]; + int res; tipc_own_addr = addr; tipc_named_reinit(); tipc_port_reinit(); - tipc_bclink_init(); + res = tipc_bclink_init(); + if (res) + return res; + tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, TIPC_ZONE_SCOPE, 0, tipc_own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index f781cae8df4b..c6c2b46f7c28 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,7 +39,7 @@ void tipc_net_route_msg(struct sk_buff *buf); -void tipc_net_start(u32 addr); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif -- cgit v1.2.3 From 3f5a12bd9f9a61d8a12f9adf778b14e4bb8ca050 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:17 +0800 Subject: tipc: avoid to asynchronously reset all links Postpone the actions of resetting all links until after bclink lock is released, avoiding to asynchronously reset all links. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 21 +++++++++++++++++++++ net/tipc/bcast.h | 2 ++ net/tipc/link.c | 22 ++++++++-------------- net/tipc/link.h | 1 + 4 files changed, 32 insertions(+), 14 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ef8cff4ad743..a0978d0890cb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -87,6 +87,7 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,6 +97,7 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; + unsigned int flags; struct tipc_node_map bcast_nodes; struct tipc_node *retransmit_to; }; @@ -119,7 +121,26 @@ static void tipc_bclink_lock(void) static void tipc_bclink_unlock(void) { + struct tipc_node *node = NULL; + + if (likely(!bclink->flags)) { + spin_unlock_bh(&bclink->lock); + return; + } + + if (bclink->flags & TIPC_BCLINK_RESET) { + bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(); + } spin_unlock_bh(&bclink->lock); + + if (node) + tipc_link_reset_all(node); +} + +void tipc_bclink_set_flags(unsigned int flags) +{ + bclink->flags |= flags; } static u32 bcbuf_acks(struct sk_buff *buf) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index ea162c7b30ee..00330c45df3e 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -39,6 +39,7 @@ #define MAX_NODES 4096 #define WSIZE 32 +#define TIPC_BCLINK_RESET 1 /** * struct tipc_node_map - set of node identifiers @@ -83,6 +84,7 @@ void tipc_port_list_free(struct tipc_port_list *pl_ptr); int tipc_bclink_init(void); void tipc_bclink_stop(void); +void tipc_bclink_set_flags(unsigned int flags); void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); diff --git a/net/tipc/link.c b/net/tipc/link.c index ac074aaf104d..dce2bef81720 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1259,29 +1259,24 @@ void tipc_link_push_queue(struct tipc_link *l_ptr) } while (!res); } -static void link_reset_all(unsigned long addr) +void tipc_link_reset_all(struct tipc_node *node) { - struct tipc_node *n_ptr; char addr_string[16]; u32 i; - n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) - return; /* node no longer exists */ - - tipc_node_lock(n_ptr); + tipc_node_lock(node); pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + tipc_addr_string_fill(addr_string, node->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (n_ptr->links[i]) { - link_print(n_ptr->links[i], "Resetting link\n"); - tipc_link_reset(n_ptr->links[i]); + if (node->links[i]) { + link_print(node->links[i], "Resetting link\n"); + tipc_link_reset(node->links[i]); } } - tipc_node_unlock(n_ptr); + tipc_node_unlock(node); } static void link_retransmit_failure(struct tipc_link *l_ptr, @@ -1318,10 +1313,9 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); - tipc_node_unlock(n_ptr); + tipc_bclink_set_flags(TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } diff --git a/net/tipc/link.h b/net/tipc/link.h index 4b556c181bae..7ba73fa6b81e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -230,6 +230,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); -- cgit v1.2.3 From 52ff872055e06af10f94b8853c946f07ed8a0672 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 5 May 2014 08:56:18 +0800 Subject: tipc: purge signal handler infrastructure In the previous commits of this series, we removed all asynchronous actions which were based on the tasklet handler - "tipc_k_signal()". So the moment has now come when we can completely remove the tasklet handler infrastructure. That is done with this commit. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/Makefile | 2 +- net/tipc/core.c | 7 --- net/tipc/core.h | 6 +-- net/tipc/handler.c | 134 ----------------------------------------------------- 4 files changed, 2 insertions(+), 147 deletions(-) delete mode 100644 net/tipc/handler.c (limited to 'net/tipc') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index b282f7130d2b..a080c66d819a 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o config.o \ - core.o handler.o link.o discover.o msg.o \ + core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ socket.o log.o eth_media.o server.o diff --git a/net/tipc/core.c b/net/tipc/core.c index 50d57429ebca..57f8ae9aa466 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -80,7 +80,6 @@ struct sk_buff *tipc_buf_acquire(u32 size) */ static void tipc_core_stop(void) { - tipc_handler_stop(); tipc_net_stop(); tipc_bearer_cleanup(); tipc_netlink_stop(); @@ -100,10 +99,6 @@ static int tipc_core_start(void) get_random_bytes(&tipc_random, sizeof(tipc_random)); - err = tipc_handler_start(); - if (err) - goto out_handler; - err = tipc_ref_table_init(tipc_max_ports, tipc_random); if (err) goto out_reftbl; @@ -146,8 +141,6 @@ out_netlink: out_nametbl: tipc_ref_table_stop(); out_reftbl: - tipc_handler_stop(); -out_handler: return err; } diff --git a/net/tipc/core.h b/net/tipc/core.h index 36cbf158845f..ae55d37267e6 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -89,8 +89,6 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -int tipc_handler_start(void); -void tipc_handler_stop(void); int tipc_netlink_start(void); void tipc_netlink_stop(void); int tipc_socket_init(void); @@ -109,12 +107,10 @@ void tipc_unregister_sysctl(void); #endif /* - * TIPC timer and signal code + * TIPC timer code */ typedef void (*Handler) (unsigned long); -u32 tipc_k_signal(Handler routine, unsigned long argument); - /** * k_init_timer - initialize a timer * @timer: pointer to timer structure diff --git a/net/tipc/handler.c b/net/tipc/handler.c deleted file mode 100644 index 1fabf160501f..000000000000 --- a/net/tipc/handler.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * net/tipc/handler.c: TIPC signal handling - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" - -struct queue_item { - struct list_head next_signal; - void (*handler) (unsigned long); - unsigned long data; -}; - -static struct kmem_cache *tipc_queue_item_cache; -static struct list_head signal_queue_head; -static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled __read_mostly; - -static void process_signal_queue(unsigned long dummy); - -static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0); - - -unsigned int tipc_k_signal(Handler routine, unsigned long argument) -{ - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return -ENOPROTOOPT; - } - - item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); - if (!item) { - pr_err("Signal queue out of memory\n"); - spin_unlock_bh(&qitem_lock); - return -ENOMEM; - } - item->handler = routine; - item->data = argument; - list_add_tail(&item->next_signal, &signal_queue_head); - spin_unlock_bh(&qitem_lock); - tasklet_schedule(&tipc_tasklet); - return 0; -} - -static void process_signal_queue(unsigned long dummy) -{ - struct queue_item *__volatile__ item; - struct list_head *l, *n; - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - spin_unlock_bh(&qitem_lock); - item->handler(item->data); - spin_lock_bh(&qitem_lock); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); -} - -int tipc_handler_start(void) -{ - tipc_queue_item_cache = - kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!tipc_queue_item_cache) - return -ENOMEM; - - INIT_LIST_HEAD(&signal_queue_head); - tasklet_enable(&tipc_tasklet); - handler_enabled = 1; - return 0; -} - -void tipc_handler_stop(void) -{ - struct list_head *l, *n; - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return; - } - handler_enabled = 0; - spin_unlock_bh(&qitem_lock); - - tasklet_kill(&tipc_tasklet); - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); - - kmem_cache_destroy(tipc_queue_item_cache); -} -- cgit v1.2.3 From aecb9bb89cbc08366c50a98d2d4751b381a6dc3b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 8 May 2014 08:54:39 +0800 Subject: tipc: rename enum names of node flags Rename node flags to action_flags as well as its enum names so that they can reflect its real meanings. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 6 +++--- net/tipc/node.c | 17 +++++++++-------- net/tipc/node.h | 29 +++++++++++++---------------- 3 files changed, 25 insertions(+), 27 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index dce2bef81720..26abb16e86ab 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1489,12 +1489,12 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) goto unlock_discard; /* Verify that communication with node is currently allowed */ - if ((n_ptr->flags & TIPC_NODE_DOWN) && + if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && msg_user(msg) == LINK_PROTOCOL && (msg_type(msg) == RESET_MSG || msg_type(msg) == ACTIVATE_MSG) && !msg_redundant_link(msg)) - n_ptr->flags &= ~TIPC_NODE_DOWN; + n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; if (tipc_node_blocked(n_ptr)) goto unlock_discard; @@ -1853,7 +1853,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) * peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ - l_ptr->owner->flags = TIPC_NODE_RESET; + l_ptr->owner->action_flags = TIPC_WAIT_OWN_LINKS_DOWN; } link_state_event(l_ptr, RESET_MSG); diff --git a/net/tipc/node.c b/net/tipc/node.c index 74efebc1cb7a..bb66a268b139 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -108,7 +108,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->flags = TIPC_NODE_DOWN; + n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; tipc_num_nodes++; @@ -267,7 +267,7 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) static void node_established_contact(struct tipc_node *n_ptr) { - n_ptr->flags |= TIPC_NODE_UP; + n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; n_ptr->bclink.acked = tipc_bclink_get_last_sent(); tipc_bclink_add_node(n_ptr->addr); @@ -311,7 +311,8 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Notify subscribers and prevent re-contact with node until * cleanup is done. */ - n_ptr->flags = TIPC_NODE_DOWN | TIPC_NODE_LOST; + n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN; } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) @@ -459,18 +460,18 @@ void tipc_node_unlock(struct tipc_node *node) int pkt_sz = 0; u32 addr = 0; - if (likely(!node->flags)) { + if (likely(!node->action_flags)) { spin_unlock_bh(&node->lock); return; } - if (node->flags & TIPC_NODE_LOST) { + if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { list_replace_init(&node->nsub, &nsub_list); - node->flags &= ~TIPC_NODE_LOST; + node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } - if (node->flags & TIPC_NODE_UP) { + if (node->action_flags & TIPC_NOTIFY_NODE_UP) { link = node->active_links[0]; - node->flags &= ~TIPC_NODE_UP; + node->action_flags &= ~TIPC_NOTIFY_NODE_UP; if (link) { pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) * ITEM_SIZE; diff --git a/net/tipc/node.h b/net/tipc/node.h index 38f710fb75dc..5454edf994c3 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -47,20 +47,17 @@ */ #define INVALID_NODE_SIG 0x10000 -/* Flags used to block (re)establishment of contact with a neighboring node - * TIPC_NODE_DOWN: indicate node is down and it's used to block the node's - * links until RESET or ACTIVE message arrives - * TIPC_NODE_RESET: indicate node is reset - * TIPC_NODE_LOST: indicate node is lost and it's used to notify subscriptions - * when node lock is released - * TIPC_NODE_UP: indicate node is up and it's used to deliver local name table - * when node lock is released +/* Flags used to take different actions according to flag type + * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down + * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up */ enum { - TIPC_NODE_DOWN = (1 << 1), - TIPC_NODE_RESET = (1 << 2), - TIPC_NODE_LOST = (1 << 3), - TIPC_NODE_UP = (1 << 4) + TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), + TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4) }; /** @@ -96,7 +93,7 @@ struct tipc_node_bclink { * @hash: links to adjacent nodes in unsorted hash chain * @active_links: pointers to active links to node * @links: pointers to all links to node - * @flags: bit mask of conditions preventing link establishment to node + * @action_flags: bit mask of different types of node actions * @bclink: broadcast-related info * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -111,7 +108,7 @@ struct tipc_node { struct hlist_node hash; struct tipc_link *active_links[2]; struct tipc_link *links[MAX_BEARERS]; - unsigned int flags; + unsigned int action_flags; struct tipc_node_bclink bclink; struct list_head list; int link_cnt; @@ -144,8 +141,8 @@ static inline void tipc_node_lock(struct tipc_node *node) static inline bool tipc_node_blocked(struct tipc_node *node) { - return (node->flags & (TIPC_NODE_DOWN | TIPC_NODE_LOST | - TIPC_NODE_RESET)); + return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } #endif -- cgit v1.2.3 From ca9cf06a0654fcf4b114a5a2d08723fc45d00317 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 8 May 2014 08:54:40 +0800 Subject: tipc: don't directly overwrite node action_flags Each node action flag should be set or cleared separately, instead we now set the whole flags variable in one shot, and it's turned out to be hard to see which other flags are affected. Therefore, for instance, we explicitly clear TIPC_WAIT_OWN_LINKS_DOWN bit in node_lost_contact(). Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- net/tipc/node.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 26abb16e86ab..2140837fbab9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1853,7 +1853,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) * peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ - l_ptr->owner->action_flags = TIPC_WAIT_OWN_LINKS_DOWN; + l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; } link_state_event(l_ptr, RESET_MSG); diff --git a/net/tipc/node.c b/net/tipc/node.c index bb66a268b139..facd5611e785 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -308,11 +308,13 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_link_reset_fragments(l_ptr); } + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; + /* Notify subscribers and prevent re-contact with node until * cleanup is done. */ - n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN | - TIPC_NOTIFY_NODE_DOWN; + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN; } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) -- cgit v1.2.3 From 6163a194e02ab6cab2758b277a0ae082378dd4e6 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:08 -0400 Subject: tipc: decrease connection flow control window Memory overhead when allocating big buffers for data transfer may be quite significant. E.g., truesize of a 64 KB buffer turns out to be 132 KB, 2 x the requested size. This invalidates the "worst case" calculation we have been using to determine the default socket receive buffer limit, which is based on the assumption that 1024x64KB = 67MB buffers may be queued up on a socket. Since TIPC connections cannot survive hitting the buffer limit, we have to compensate for this overhead. We do that in this commit by dividing the fix connection flow control window from 1024 (2*512) messages to 512 (2*256). Since older version nodes send out acks at 512 message intervals, compatibility with such nodes is guaranteed, although performance may be non-optimal in such cases. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/core.c | 7 ++++--- net/tipc/port.h | 9 +++++---- net/tipc/socket.c | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/core.c b/net/tipc/core.c index 57f8ae9aa466..676d18015dd8 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -154,10 +154,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; - sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; res = tipc_core_start(); if (res) diff --git a/net/tipc/port.h b/net/tipc/port.h index a00397393bd1..5dfd165df1d7 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -42,9 +42,10 @@ #include "msg.h" #include "node_subscr.h" -#define TIPC_FLOW_CONTROL_WIN 512 -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +#define TIPC_CONNACK_INTV 256 +#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) +#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) /** * struct tipc_port - TIPC port structure @@ -187,7 +188,7 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) static inline int tipc_port_congested(struct tipc_port *p_ptr) { - return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); + return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3f9912f87d0d..8685daf060f9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1101,7 +1101,7 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + (++port->conn_unacked >= TIPC_CONNACK_INTV)) tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } @@ -1210,7 +1210,7 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV)) tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } -- cgit v1.2.3 From 4f4482dcd9a0606a30541ff165ddaca64748299b Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:09 -0400 Subject: tipc: compensate for double accounting in socket rcv buffer The function net/core/sock.c::__release_sock() runs a tight loop to move buffers from the socket backlog queue to the receive queue. As a security measure, sk_backlog.len of the receiving socket is not set to zero until after the loop is finished, i.e., until the whole backlog queue has been transferred to the receive queue. During this transfer, the data that has already been moved is counted both in the backlog queue and the receive queue, hence giving an incorrect picture of the available queue space for new arriving buffers. This leads to unnecessary rejection of buffers by sk_add_backlog(), which in TIPC leads to unnecessarily broken connections. In this commit, we compensate for this double accounting by adding a counter that keeps track of it. The function socket.c::backlog_rcv() receives buffers one by one from __release_sock(), and adds them to the socket receive queue. If the transfer is successful, it increases a new atomic counter 'tipc_sock::dupl_rcvcnt' with 'truesize' of the transferred buffer. If a new buffer arrives during this transfer and finds the socket busy (owned), we attempt to add it to the backlog. However, when sk_add_backlog() is called, we adjust the 'limit' parameter with the value of the new counter, so that the risk of inadvertent rejection is eliminated. It should be noted that this change does not invalidate the original purpose of zeroing 'sk_backlog.len' after the full transfer. We set an upper limit for dupl_rcvcnt, so that if a 'wild' sender (i.e., one that doesn't respect the send window) keeps pumping in buffers to sk_add_backlog(), he will eventually reach an upper limit, (2 x TIPC_CONN_OVERLOAD_LIMIT). After that, no messages can be added to the backlog, and the connection will be broken. Ordinary, well- behaved senders will never reach this buffer limit at all. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 28 +++++++++++++++++++--------- net/tipc/socket.h | 2 ++ 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8685daf060f9..249500614568 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,5 +1,5 @@ /* - * net/tipc/socket.c: TIPC socket API +* net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems @@ -45,7 +45,7 @@ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -static int backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); @@ -196,11 +196,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock->state = state; sock_init_data(sock, sk); - sk->sk_backlog_rcv = backlog_rcv; + sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; - tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); if (sock->state == SS_READY) { @@ -1416,7 +1417,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /** - * backlog_rcv - handle incoming message from backlog queue + * tipc_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @buf: message * @@ -1424,13 +1425,18 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) * * Returns 0 */ -static int backlog_rcv(struct sock *sk, struct sk_buff *buf) +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { u32 res; + struct tipc_sock *tsk = tipc_sk(sk); res = filter_rcv(sk, buf); - if (res) + if (unlikely(res)) tipc_reject_msg(buf, res); + + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(buf->truesize, &tsk->dupl_rcvcnt); + return 0; } @@ -1445,8 +1451,9 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) */ u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) { + struct tipc_sock *tsk = tipc_sk(sk); u32 res; - + uint limit; /* * Process message if socket is unlocked; otherwise add to backlog queue * @@ -1457,7 +1464,10 @@ u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) + if (sk->sk_backlog.len == 0) + atomic_set(&tsk->dupl_rcvcnt, 0); + limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); + if (sk_add_backlog(sk, buf, limit)) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 74e5c7f195a6..86c27cc51e33 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,12 +44,14 @@ * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue */ struct tipc_sock { struct sock sk; struct tipc_port port; unsigned int conn_timeout; + atomic_t dupl_rcvcnt; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) -- cgit v1.2.3 From ec37dcd382b4f3673bfbf36ccd348ef48f98ffe3 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:10 -0400 Subject: tipc: don't record link RESET or ACTIVATE messages as traffic In the current code, all incoming LINK_PROTOCOL messages, irrespective of type, nudge the "last message received" checkpoint, informing the link state machine that a message was received from the peer since last supervision timeout event. This inhibits the link from starting probing the peer unnecessarily. However, not only STATE messages are recorded as legitimate incoming traffic this way, but even RESET and ACTIVATE messages, which in reality are there to inform the link that the peer endpoint has been reset. At the same time, some RESET messages may be dropped instead of causing a link reset. This happens when the link endpoint thinks it is fully up and working, and the session number of the RESET is lower than or equal to the current link session. In such cases the RESET is perceived as a delayed remnant from an earlier session, or the current one, and dropped. Now, if a TIPC module is removed and then immediately reinserted, e.g. when using a script, RESET messages may arrive at the peer link endpoint before this one has had time to discover the failure. The RESET may be dropped because of the session number, but only after it has been recorded as a legitimate traffic event. Hence, the receiving link will not start probing, and not discover that the peer endpoint is down, at the same time ignoring the periodic RESET messages coming from that endpoint. We have ended up in a stale state where a failed link cannot be re-established. In this commit, we remedy this by nudging the checkpoint only for received STATE messages, not for RESET or ACTIVATE messages. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 2140837fbab9..6cf7938784c6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1831,9 +1831,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (l_ptr->exp_msg_count) goto exit; - /* record unnumbered packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; - if (l_ptr->net_plane != msg_net_plane(msg)) if (tipc_own_addr > msg_prevnode(msg)) l_ptr->net_plane = msg_net_plane(msg); @@ -1909,6 +1906,10 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; } + + /* Record reception; force mismatch at next timeout: */ + l_ptr->checkpoint--; + link_state_event(l_ptr, TRAFFIC_MSG_EVT); l_ptr->stats.recv_states++; if (link_reset_unknown(l_ptr)) -- cgit v1.2.3 From 5074ab89c555dd130ceeac129546670423d634b8 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:11 -0400 Subject: tipc: mark head of reassembly buffer as non-linear The message reassembly function does not update the 'len' and 'data_len' fields of the head skbuff correctly when fragments are chained to it. This may sometimes lead to obsure errors, such as fragment reordering when we receive fragments which are cloned buffers. This commit fixes this, by ensuring that the two fields are updated correctly. Suggested-by: Eric Dumazet Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 6cf7938784c6..9272d4cc0225 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2341,6 +2341,8 @@ int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, (*tail)->next = frag; *tail = frag; (*head)->truesize += frag->truesize; + (*head)->data_len += frag->len; + (*head)->len += frag->len; } if (fragid == LAST_FRAGMENT) { *fbuf = *head; -- cgit v1.2.3 From 37e22164a8a3c39bdad45aa463b1e69a1fdf4110 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:12 -0400 Subject: tipc: rename and move message reassembly function The function tipc_link_frag_rcv() is in reality a re-entrant generic message reassemby function that has nothing in particular to do with the link, where it is defined now. This becomes obvious when we see the need to call the function from other places in the code. In this commit rename it to tipc_buf_append() and move it to the file msg.c. We also simplify its signature by moving the tail pointer to the control block of the head buffer, hence making the head buffer self-contained. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 10 +++------ net/tipc/core.h | 1 + net/tipc/link.c | 67 +++++--------------------------------------------------- net/tipc/link.h | 14 ++---------- net/tipc/msg.c | 55 +++++++++++++++++++++++++++++++++++++++++++++- net/tipc/msg.h | 5 ++++- net/tipc/node.c | 7 +++--- net/tipc/node.h | 6 ++--- 8 files changed, 74 insertions(+), 91 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a0978d0890cb..671f9817b4f4 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -506,18 +506,14 @@ receive: tipc_node_unlock(node); tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { - int ret; - ret = tipc_link_frag_rcv(&node->bclink.reasm_head, - &node->bclink.reasm_tail, - &buf); - if (ret == LINK_REASM_ERROR) + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; - if (ret == LINK_REASM_COMPLETE) { + if (buf) { bcl->stats.recv_fragmented++; - /* Point msg to inner header */ msg = buf_msg(buf); tipc_bclink_unlock(); goto receive; diff --git a/net/tipc/core.h b/net/tipc/core.h index ae55d37267e6..2e00682bc455 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -187,6 +187,7 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; bool deferred; + struct sk_buff *tail; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/link.c b/net/tipc/link.c index 9272d4cc0225..24d058796cd9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -398,9 +398,8 @@ static void link_release_outqueue(struct tipc_link *l_ptr) */ void tipc_link_reset_fragments(struct tipc_link *l_ptr) { - kfree_skb(l_ptr->reasm_head); - l_ptr->reasm_head = NULL; - l_ptr->reasm_tail = NULL; + kfree_skb(l_ptr->reasm_buf); + l_ptr->reasm_buf = NULL; } /** @@ -1573,17 +1572,12 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) } msg = buf_msg(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { - int rc; - l_ptr->stats.recv_fragments++; - rc = tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); - if (rc == LINK_REASM_COMPLETE) { + if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) { l_ptr->stats.recv_fragmented++; msg = buf_msg(buf); } else { - if (rc == LINK_REASM_ERROR) + if (!l_ptr->reasm_buf) tipc_link_reset(l_ptr); tipc_node_unlock(n_ptr); continue; @@ -2169,9 +2163,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, } if (msg_user(msg) == MSG_FRAGMENTER) { l_ptr->stats.recv_fragments++; - tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); + tipc_buf_append(&l_ptr->reasm_buf, &buf); } } exit: @@ -2309,55 +2301,6 @@ static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) return dsz; } -/* tipc_link_frag_rcv(): Called with node lock on. Returns - * the reassembled buffer if message is complete. - */ -int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff **fbuf) -{ - struct sk_buff *frag = *fbuf; - struct tipc_msg *msg = buf_msg(frag); - u32 fragid = msg_type(msg); - bool headstolen; - int delta; - - skb_pull(frag, msg_hdr_sz(msg)); - if (fragid == FIRST_FRAGMENT) { - if (*head || skb_unclone(frag, GFP_ATOMIC)) - goto out_free; - *head = frag; - skb_frag_list_init(*head); - *fbuf = NULL; - return 0; - } else if (*head && - skb_try_coalesce(*head, frag, &headstolen, &delta)) { - kfree_skb_partial(frag, headstolen); - } else { - if (!*head) - goto out_free; - if (!skb_has_frag_list(*head)) - skb_shinfo(*head)->frag_list = frag; - else - (*tail)->next = frag; - *tail = frag; - (*head)->truesize += frag->truesize; - (*head)->data_len += frag->len; - (*head)->len += frag->len; - } - if (fragid == LAST_FRAGMENT) { - *fbuf = *head; - *tail = *head = NULL; - return LINK_REASM_COMPLETE; - } - *fbuf = NULL; - return 0; -out_free: - pr_warn_ratelimited("Link unable to reassemble fragmented message\n"); - kfree_skb(*fbuf); - *fbuf = NULL; - return LINK_REASM_ERROR; -} - static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) { if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 7ba73fa6b81e..200d518b218e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -40,11 +40,6 @@ #include "msg.h" #include "node.h" -/* Link reassembly status codes - */ -#define LINK_REASM_ERROR -1 -#define LINK_REASM_COMPLETE 1 - /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -140,8 +135,7 @@ struct tipc_stats { * @next_out: ptr to first unsent outbound message in queue * @waiting_ports: linked list of ports waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_head: list head of partially reassembled inbound message fragments - * @reasm_tail: last fragment received + * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity */ struct tipc_link { @@ -204,8 +198,7 @@ struct tipc_link { /* Fragmentation/reassembly */ u32 long_msg_seq_no; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; + struct sk_buff *reasm_buf; /* Statistics */ struct tipc_stats stats; @@ -242,9 +235,6 @@ int tipc_link_iovec_xmit_fast(struct tipc_port *sender, struct iovec const *msg_sect, unsigned int len, u32 destnode); void tipc_link_bundle_rcv(struct sk_buff *buf); -int tipc_link_frag_rcv(struct sk_buff **reasm_head, - struct sk_buff **reasm_tail, - struct sk_buff **fbuf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_queue(struct tipc_link *l_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e525f8ce1dee..8be6e94a1ca9 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -99,3 +99,56 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, } return dsz; } + +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * Let first buffer become head buffer + * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0 + * Leaves headbuf pointer at NULL if failure + */ +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) +{ + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail; + struct tipc_msg *msg = buf_msg(frag); + u32 fragid = msg_type(msg); + bool headstolen; + int delta; + + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (head || skb_unclone(frag, GFP_ATOMIC)) + goto out_free; + head = *headbuf = frag; + skb_frag_list_init(head); + return 0; + } + if (!head) + goto out_free; + tail = TIPC_SKB_CB(head)->tail; + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + if (fragid == LAST_FRAGMENT) { + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + *buf = NULL; + return 0; +out_free: + pr_warn_ratelimited("Unable to build fragment list\n"); + kfree_skb(*buf); + return 0; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 76d1269b9443..503511903d1d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, Ericsson AB + * Copyright (c) 2000-2007, 2014, Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -711,4 +711,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, unsigned int len, int max_size, struct sk_buff **buf); + +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); + #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index facd5611e785..5b44c3041be4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -286,10 +286,9 @@ static void node_lost_contact(struct tipc_node *n_ptr) kfree_skb_list(n_ptr->bclink.deferred_head); n_ptr->bclink.deferred_size = 0; - if (n_ptr->bclink.reasm_head) { - kfree_skb(n_ptr->bclink.reasm_head); - n_ptr->bclink.reasm_head = NULL; - n_ptr->bclink.reasm_tail = NULL; + if (n_ptr->bclink.reasm_buf) { + kfree_skb(n_ptr->bclink.reasm_buf); + n_ptr->bclink.reasm_buf = NULL; } tipc_bclink_remove_node(n_ptr->addr); diff --git a/net/tipc/node.h b/net/tipc/node.h index 5454edf994c3..9087063793f2 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -69,8 +69,7 @@ enum { * @deferred_size: number of OOS b'cast messages in deferred queue * @deferred_head: oldest OOS b'cast message received from node * @deferred_tail: newest OOS b'cast message received from node - * @reasm_head: broadcast reassembly queue head from node - * @reasm_tail: last broadcast fragment received from node + * @reasm_buf: broadcast reassembly queue head from node * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node_bclink { @@ -81,8 +80,7 @@ struct tipc_node_bclink { u32 deferred_size; struct sk_buff *deferred_head; struct sk_buff *deferred_tail; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; + struct sk_buff *reasm_buf; bool recv_permitted; }; -- cgit v1.2.3 From 38504c28a201a80d12a6a0f821fecb331cb1f223 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:13 -0400 Subject: tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 36 +++++++----------------------------- net/tipc/bearer.h | 35 +++++++++++++++++++---------------- net/tipc/core.h | 1 + net/tipc/discover.c | 2 +- net/tipc/eth_media.c | 51 ++++++++++++++++++++++++++++++++------------------- net/tipc/ib_media.c | 34 +++++++++++++++++++++++----------- 6 files changed, 83 insertions(+), 76 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f3259d4133b6..264474394f9f 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -411,28 +411,6 @@ int tipc_disable_bearer(const char *name) return res; } - -/* tipc_l2_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac) -{ - int len = b->media->hwaddr_len; - - if (unlikely(sizeof(a->value) < len)) { - WARN_ONCE(1, "Media length invalid\n"); - return; - } - - memcpy(a->value, mac, len); - memset(a->value + len, 0, sizeof(a->value) - len); - a->media_id = b->media->type_id; - a->broadcast = !memcmp(mac, b->bcast_addr.value, len); -} - int tipc_enable_l2_media(struct tipc_bearer *b) { struct net_device *dev; @@ -443,21 +421,21 @@ int tipc_enable_l2_media(struct tipc_bearer *b) if (!dev) return -ENODEV; - /* Associate TIPC bearer with Ethernet bearer */ + /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); - memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = 1; b->mtu = dev->mtu; - tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } -/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * Mark L2 bearer as inactive so that incoming buffers are thrown away, * then get worker thread to complete bearer cleanup. (Can't do cleanup * here because cleanup code needs to sleep and caller holds spinlocks.) */ @@ -473,7 +451,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) } /** - * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address @@ -597,7 +575,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_reset_bearer(b_ptr); break; case NETDEV_CHANGEADDR: - tipc_l2_media_addr_set(b_ptr, &b_ptr->addr, + b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); tipc_reset_bearer(b_ptr); break; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a983b3005e71..78fccc49de23 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -42,14 +42,12 @@ #define MAX_BEARERS 2 #define MAX_MEDIA 2 -/* - * Identifiers associated with TIPC message header media address info - * - * - address info field is 20 bytes long - * - media type identifier located at offset 3 - * - remaining bytes vary according to media type +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 20 +#define TIPC_MEDIA_ADDR_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 /* @@ -77,9 +75,10 @@ struct tipc_bearer; * @send_msg: routine which handles buffer transmission * @enable_media: routine which enables a media * @disable_media: routine which disables a media - * @addr2str: routine which converts media address to string - * @addr2msg: routine which converts media address to protocol message area - * @msg2addr: routine which converts media address from protocol message area + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -93,10 +92,16 @@ struct tipc_media { struct tipc_media_addr *dest); int (*enable_media)(struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); - int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); - int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); - int (*msg2addr)(const struct tipc_bearer *b_ptr, - struct tipc_media_addr *a, char *msg_area); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *raw); u32 priority; u32 tolerance; u32 window; @@ -175,8 +180,6 @@ int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac); int tipc_enable_l2_media(struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, diff --git a/net/tipc/core.h b/net/tipc/core.h index 2e00682bc455..bb26ed1ee966 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -57,6 +57,7 @@ #include #include #include +#include #define TIPC_MOD_VER "2.0.0" diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bd35c4a0746f..b15cbedfea13 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -83,7 +83,7 @@ static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, msg_set_node_sig(msg, tipc_random); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); + b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } /** diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 67cf3f935dba..5e1426f1751f 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,7 +1,7 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, 2013, Ericsson AB + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * @@ -37,39 +37,52 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ +#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ -/* convert Ethernet address to string */ -static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf, - int str_size) +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) { - if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; - sprintf(str_buf, "%pM", a->value); + sprintf(strbuf, "%pM", addr->value); return 0; } -/* convert Ethernet address format to message header format */ -static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } -/* convert message header address format to Ethernet format */ -static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) - return 1; + char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); return 0; } +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + /* Skip past preamble: */ + msg += ETH_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); +} + /* Ethernet media registration info */ struct tipc_media eth_media_info = { .send_msg = tipc_l2_send_msg, @@ -78,6 +91,7 @@ struct tipc_media eth_media_info = { .addr2str = tipc_eth_addr2str, .addr2msg = tipc_eth_addr2msg, .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -85,4 +99,3 @@ struct tipc_media eth_media_info = { .hwaddr_len = ETH_ALEN, .name = "eth" }; - diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 844a77e25828..8522eef9c136 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,7 +42,7 @@ #include "core.h" #include "bearer.h" -/* convert InfiniBand address to string */ +/* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { @@ -54,23 +54,35 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, return 0; } -/* convert InfiniBand address format to message header format */ -static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; - memcpy(msg_area, a->value, INFINIBAND_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } -/* convert message header address format to InfiniBand format */ -static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - tipc_l2_media_addr_set(tb_ptr, a, msg_area); + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); return 0; } +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + /* InfiniBand media registration info */ struct tipc_media ib_media_info = { .send_msg = tipc_l2_send_msg, @@ -79,6 +91,7 @@ struct tipc_media ib_media_info = { .addr2str = tipc_ib_addr2str, .addr2msg = tipc_ib_addr2msg, .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -86,4 +99,3 @@ struct tipc_media ib_media_info = { .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; - -- cgit v1.2.3 From c82910e2a8d6fc9dd321a1f30dd4e89fb779cfe1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:14 -0400 Subject: tipc: clean up neigbor discovery message reception The function tipc_disc_rcv(), which is handling received neighbor discovery messages, is perceived as messy, and it is hard to verify its correctness by code inspection. The fact that the task it is set to resolve is fairly complex does not make the situation better. In this commit we try to take a more systematic approach to the problem. We define a decision machine which takes three state flags as input, and produces three action flags as output. We then walk through all permutations of the state flags, and for each of them we describe verbally what is going on, plus that we set zero or more of the action flags. The action flags indicate what should be done once the decision machine has finished its job, while the last part of the function deals with performing those actions. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/discover.c | 219 ++++++++++++++++++++++++++-------------------------- 1 file changed, 111 insertions(+), 108 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index b15cbedfea13..aa722a42ef8b 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -106,147 +106,150 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, } /** - * tipc_disc_rcv - handle incoming link setup message (request or response) + * tipc_disc_rcv - handle incoming discovery message (request or response) * @buf: buffer containing message - * @b_ptr: bearer that message arrived on + * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) +void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) { - struct tipc_node *n_ptr; + struct tipc_node *node; struct tipc_link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr maddr; struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); - u32 dest = msg_dest_domain(msg); - u32 orig = msg_prevnode(msg); + u32 ddom = msg_dest_domain(msg); + u32 onode = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); - u32 type = msg_type(msg); + u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); - int addr_mismatch; - int link_fully_up; - - media_addr.broadcast = 1; - b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg)); + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + bool accept_sign = false; + bool respond = false; + + bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg)); kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ if (net_id != tipc_net_id) return; - if (media_addr.broadcast) + if (maddr.broadcast) return; - if (!tipc_addr_domain_valid(dest)) + if (!tipc_addr_domain_valid(ddom)) return; - if (!tipc_addr_node_valid(orig)) + if (!tipc_addr_node_valid(onode)) return; - if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) - disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); + + if (in_own_node(onode)) { + if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) + disc_dupl_alert(bearer, tipc_own_addr, &maddr); return; } - if (!tipc_in_scope(dest, tipc_own_addr)) + if (!tipc_in_scope(ddom, tipc_own_addr)) return; - if (!tipc_in_scope(b_ptr->domain, orig)) + if (!tipc_in_scope(bearer->domain, onode)) return; - /* Locate structure corresponding to requesting node */ - n_ptr = tipc_node_find(orig); - if (!n_ptr) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - tipc_node_lock(n_ptr); + /* Locate, or if necessary, create, node: */ + node = tipc_node_find(onode); + if (!node) + node = tipc_node_create(onode); + if (!node) + return; - /* Prepare to validate requesting node's signature and media address */ - link = n_ptr->links[b_ptr->identity]; - addr_mismatch = (link != NULL) && - memcmp(&link->media_addr, &media_addr, sizeof(media_addr)); + tipc_node_lock(node); + link = node->links[bearer->identity]; - /* - * Ensure discovery message's signature is correct - * - * If signature is incorrect and there is no working link to the node, - * accept the new signature but invalidate all existing links to the - * node so they won't re-activate without a new discovery message. - * - * If signature is incorrect and the requested link to the node is - * working, accept the new signature. (This is an instance of delayed - * rediscovery, where a link endpoint was able to re-establish contact - * with its peer endpoint on a node that rebooted before receiving a - * discovery message from that node.) - * - * If signature is incorrect and there is a working link to the node - * that is not the requested link, reject the request (must be from - * a duplicate node). - */ - if (signature != n_ptr->signature) { - if (n_ptr->working_links == 0) { - struct tipc_link *curr_link; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - curr_link = n_ptr->links[i]; - if (curr_link) { - memset(&curr_link->media_addr, 0, - sizeof(media_addr)); - tipc_link_reset(curr_link); - } - } - addr_mismatch = (link != NULL); - } else if (tipc_link_is_up(link) && !addr_mismatch) { - /* delayed rediscovery */ - } else { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } - n_ptr->signature = signature; + /* Prepare to validate requesting node's signature and media address */ + sign_match = (signature == node->signature); + addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr)); + link_up = link && tipc_link_is_up(link); + + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + accept_sign = true; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + accept_sign = true; + respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + accept_sign = true; + accept_addr = true; + respond = true; } - /* - * Ensure requesting node's media address is correct - * - * If media address doesn't match and the link is working, reject the - * request (must be from a duplicate node). - * - * If media address doesn't match and the link is not working, accept - * the new media address and reset the link to ensure it starts up - * cleanly. - */ - if (addr_mismatch) { - if (tipc_link_is_up(link)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } else { - memcpy(&link->media_addr, &media_addr, - sizeof(media_addr)); - tipc_link_reset(link); - } - } + if (accept_sign) + node->signature = signature; - /* Create a link endpoint for this bearer, if necessary */ - if (!link) { - link = tipc_link_create(n_ptr, b_ptr, &media_addr); - if (!link) { - tipc_node_unlock(n_ptr); - return; + if (accept_addr) { + if (!link) + link = tipc_link_create(node, bearer, &maddr); + if (link) { + memcpy(&link->media_addr, &maddr, sizeof(maddr)); + tipc_link_reset(link); + } else { + respond = false; } } - /* Accept discovery message & send response, if necessary */ - link_fully_up = link_working_working(link); - - if ((type == DSC_REQ_MSG) && !link_fully_up) { + /* Send response, if necessary */ + if (respond && (mtyp == DSC_REQ_MSG)) { rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_disc_init_msg(rbuf, DSC_RESP_MSG, b_ptr); - tipc_bearer_send(b_ptr->identity, rbuf, &media_addr); + tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } - - tipc_node_unlock(n_ptr); + tipc_node_unlock(node); } /** -- cgit v1.2.3 From 9816f0615d549b948a76e6d2385159b4366e4658 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 May 2014 05:39:15 -0400 Subject: tipc: merge port message reception into socket reception function In order to reduce complexity and save a call level during message reception at port/socket level, we remove the function tipc_port_rcv() and merge its functionality into tipc_sk_rcv(). Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- net/tipc/net.c | 3 ++- net/tipc/port.c | 39 ++++--------------------------------- net/tipc/port.h | 1 - net/tipc/socket.c | 57 ++++++++++++++++++++++++++++++++++++------------------- net/tipc/socket.h | 2 +- 6 files changed, 46 insertions(+), 59 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 24d058796cd9..ad2c57f5868d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -37,6 +37,7 @@ #include "core.h" #include "link.h" #include "port.h" +#include "socket.h" #include "name_distr.h" #include "discover.h" #include "config.h" @@ -1590,7 +1591,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: tipc_node_unlock(n_ptr); - tipc_port_rcv(buf); + tipc_sk_rcv(buf); continue; case MSG_BUNDLER: l_ptr->stats.recv_bundles++; diff --git a/net/tipc/net.c b/net/tipc/net.c index f8fc95d58c0d..f64375e7f99f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -39,6 +39,7 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "socket.h" #include "node.h" #include "config.h" @@ -141,7 +142,7 @@ void tipc_net_route_msg(struct sk_buff *buf) if (msg_mcast(msg)) tipc_port_mcast_rcv(buf, NULL); else if (msg_destport(msg)) - tipc_port_rcv(buf); + tipc_sk_rcv(buf); else net_route_named_msg(buf); return; diff --git a/net/tipc/port.c b/net/tipc/port.c index 5c14c7801ee6..5fd7acce01ea 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -165,7 +165,7 @@ void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) msg_set_destnode(msg, tipc_own_addr); if (dp->count == 1) { msg_set_destport(msg, dp->ports[0]); - tipc_port_rcv(buf); + tipc_sk_rcv(buf); tipc_port_list_free(dp); return; } @@ -180,7 +180,7 @@ void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) if ((index == 0) && (cnt != 0)) item = item->next; msg_set_destport(buf_msg(b), item->ports[index]); - tipc_port_rcv(b); + tipc_sk_rcv(b); } } exit: @@ -343,7 +343,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) /* send returned message & dispose of rejected message */ src_node = msg_prevnode(msg); if (in_own_node(src_node)) - tipc_port_rcv(rbuf); + tipc_sk_rcv(rbuf); else tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); exit: @@ -754,37 +754,6 @@ int tipc_port_shutdown(u32 ref) return tipc_port_disconnect(ref); } -/** - * tipc_port_rcv - receive message from lower layer and deliver to port user - */ -int tipc_port_rcv(struct sk_buff *buf) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg = buf_msg(buf); - u32 destport = msg_destport(msg); - u32 dsz = msg_data_sz(msg); - u32 err; - - /* forward unresolved named message */ - if (unlikely(!destport)) { - tipc_net_route_msg(buf); - return dsz; - } - - /* validate destination & pass to port, otherwise reject message */ - p_ptr = tipc_port_lock(destport); - if (likely(p_ptr)) { - err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf); - tipc_port_unlock(p_ptr); - if (likely(!err)) - return dsz; - } else { - err = TIPC_ERR_NO_PORT; - } - - return tipc_reject_msg(buf, err); -} - /* * tipc_port_iovec_rcv: Concatenate and deliver sectioned * message for this node. @@ -798,7 +767,7 @@ static int tipc_port_iovec_rcv(struct tipc_port *sender, res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (likely(buf)) - tipc_port_rcv(buf); + tipc_sk_rcv(buf); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 5dfd165df1d7..cf4ca5b1d9a4 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -135,7 +135,6 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); /* * TIPC messaging routines */ -int tipc_port_rcv(struct sk_buff *buf); int tipc_send(struct tipc_port *port, struct iovec const *msg_sect, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 249500614568..ac08966f2858 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1442,39 +1442,56 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) /** * tipc_sk_rcv - handle incoming message - * @sk: socket receiving message - * @buf: message - * - * Called with port lock already taken. - * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * @buf: buffer containing arriving message + * Consumes buffer + * Returns 0 if success, or errno: -EHOSTUNREACH */ -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) +int tipc_sk_rcv(struct sk_buff *buf) { - struct tipc_sock *tsk = tipc_sk(sk); - u32 res; + struct tipc_sock *tsk; + struct tipc_port *port; + struct sock *sk; + u32 dport = msg_destport(buf_msg(buf)); + int err = TIPC_OK; uint limit; - /* - * Process message if socket is unlocked; otherwise add to backlog queue - * - * This code is based on sk_receive_skb(), but must be distinct from it - * since a TIPC-specific filter/reject mechanism is utilized - */ + + /* Forward unresolved named message */ + if (unlikely(!dport)) { + tipc_net_route_msg(buf); + return 0; + } + + /* Validate destination */ + port = tipc_port_lock(dport); + if (unlikely(!port)) { + err = TIPC_ERR_NO_PORT; + goto exit; + } + + tsk = tipc_port_to_sock(port); + sk = &tsk->sk; + + /* Queue message */ bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { - res = filter_rcv(sk, buf); + err = filter_rcv(sk, buf); } else { if (sk->sk_backlog.len == 0) atomic_set(&tsk->dupl_rcvcnt, 0); limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); if (sk_add_backlog(sk, buf, limit)) - res = TIPC_ERR_OVERLOAD; - else - res = TIPC_OK; + err = TIPC_ERR_OVERLOAD; } + bh_unlock_sock(sk); + tipc_port_unlock(port); - return res; + if (likely(!err)) + return 0; +exit: + tipc_reject_msg(buf, err); + return -EHOSTUNREACH; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 86c27cc51e33..3afcd2a70b31 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -69,6 +69,6 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf); +int tipc_sk_rcv(struct sk_buff *buf); #endif -- cgit v1.2.3 From 85d3fc9418dc5b357290de89b99c9a8bdd9eef89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 May 2014 15:55:12 -0400 Subject: tipc: Don't reset the timeout when restarting As it may then take longer than what the user specified using setsockopt(SO_RCVTIMEO). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ac08966f2858..08d87fc80b10 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -985,10 +985,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } -static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); + long timeo = *timeop; int err; for (;;) { @@ -1013,6 +1014,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) break; } finish_wait(sk_sleep(sk), &wait); + *timeop = timeo; return err; } @@ -1056,7 +1058,7 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; @@ -1154,7 +1156,7 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; -- cgit v1.2.3 From 02c00c2ab0807ec64b480e899d2619e64566a0af Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 9 Jun 2014 11:08:18 -0500 Subject: tipc: fix potential bug in function tipc_backlog_rcv In commit 4f4482dcd9a0606a30541ff165ddaca64748299b ("tipc: compensate for double accounting in socket rcv buffer") we access 'truesize' of a received buffer after it might have been released by the function filter_rcv(). In this commit we correct this by reading the value of 'truesize' to the stack before delivering the buffer to filter_rcv(). Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 08d87fc80b10..ef0475568f9e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,5 +1,5 @@ /* -* net/tipc/socket.c: TIPC socket API + * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems @@ -1431,13 +1431,14 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { u32 res; struct tipc_sock *tsk = tipc_sk(sk); + uint truesize = buf->truesize; res = filter_rcv(sk, buf); if (unlikely(res)) tipc_reject_msg(buf, res); if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(buf->truesize, &tsk->dupl_rcvcnt); + atomic_add(truesize, &tsk->dupl_rcvcnt); return 0; } -- cgit v1.2.3 From bad93e9d4eeb0d2d6b79204d6cedc7f2e7b256f1 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 12 Jun 2014 01:36:26 +0300 Subject: net: add __pskb_copy_fclone and pskb_copy_for_clone There are several instances where a pskb_copy or __pskb_copy is immediately followed by an skb_clone. Add a couple of new functions to allow the copy skb to be allocated from the fclone cache and thus speed up subsequent skb_clone calls. Cc: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Cc: Arvid Brodin Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Cc: Jon Maloy Cc: Allan Stephens Cc: Andrew Hendry Cc: Eric Dumazet Reviewed-by: Christoph Paasch Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +++++++++++++++- net/batman-adv/distributed-arp-table.c | 2 +- net/batman-adv/network-coding.c | 2 +- net/bluetooth/hci_sock.c | 6 +++--- net/core/skbuff.c | 14 +++++++++----- net/nfc/llcp_core.c | 4 ++-- net/nfc/rawsock.c | 4 ++-- net/tipc/bcast.c | 2 +- 8 files changed, 34 insertions(+), 16 deletions(-) (limited to 'net/tipc') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c705808bef9c..1f50bfe2243d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -744,7 +744,13 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask); +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone); +static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, headroom, gfp_mask, false); +} int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, @@ -2238,6 +2244,14 @@ static inline struct sk_buff *pskb_copy(struct sk_buff *skb, return __pskb_copy(skb, skb_headroom(skb), gfp_mask); } + +static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true); +} + + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index dcd99b2bea3c..f2c066b21716 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (!neigh_node) goto free_orig; - tmp_skb = pskb_copy(skb, GFP_ATOMIC); + tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 40a2fc4bcf4c..8d04d174669e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1344,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct ethhdr *ethhdr; /* Copy skb header to change the mac header */ - skb = pskb_copy(skb, GFP_ATOMIC); + skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!skb) return; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f608bffdb8b9..80d25c150a65 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (!skb_copy) { /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; @@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) struct hci_mon_hdr *hdr; /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 05f4bef2ce12..b9e85e6cb26a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) EXPORT_SYMBOL(skb_copy); /** - * __pskb_copy - create copy of an sk_buff with private head. + * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority + * @fclone: if true allocate the copy of the skb from the fclone + * cache instead of the head cache; it is recommended to set this + * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when @@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy); * The returned buffer has a reference count of 1. */ -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); + struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; @@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) out: return n; } -EXPORT_SYMBOL(__pskb_copy); +EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index f6278da68763..51e788797317 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -680,8 +680,8 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (skb_copy == NULL) continue; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 55eefee311eb..11c3544ea546 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -378,8 +378,8 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, sk_for_each(sk, &raw_sk_list.head) { if (!skb_copy) { - skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 671f9817b4f4..26631679a1fa 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -653,7 +653,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ - tbuf = pskb_copy(buf, GFP_ATOMIC); + tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); -- cgit v1.2.3