From 543b921b475ad2e1897d5e7784437af238b33705 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Thu, 17 Nov 2016 12:48:53 +0000 Subject: cfg80211: get rid of name indirection trick for ieee80211_get_channel() The comment on the name indirection suggested an issue but turned out to be untrue. Digging in older kernel version showed issue with ipw2x00 but that is no longer true so get rid on the name indirection. Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 814be4b4200c..ca2ac1ce5862 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3955,26 +3955,15 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band); */ int ieee80211_frequency_to_channel(int freq); -/* - * Name indirection necessary because the ieee80211 code also has - * a function named "ieee80211_get_channel", so if you include - * cfg80211's header file you get cfg80211's version, if you try - * to include both header files you'll (rightfully!) get a symbol - * clash. - */ -struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, - int freq); /** * ieee80211_get_channel - get channel struct from wiphy for specified frequency + * * @wiphy: the struct wiphy to get the channel for * @freq: the center frequency of the channel + * * Return: The channel struct from @wiphy at @freq. */ -static inline struct ieee80211_channel * -ieee80211_get_channel(struct wiphy *wiphy, int freq) -{ - return __ieee80211_get_channel(wiphy, freq); -} +struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq); /** * ieee80211_get_response_rate - get basic rate for a given rate -- cgit v1.2.3 From 66b91d2cd0344c417194596ef6e387e52be69e57 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 28 Dec 2016 09:26:34 -0200 Subject: sctp: remove return value from sctp_packet_init/config There is no reason to use this cascading. It doesn't add anything. Let's remove it and simplify. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 +++---- net/sctp/output.c | 14 +++++--------- net/sctp/sm_statefuns.c | 5 +++-- 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 92daabdc007d..87d56cc80a3c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -722,10 +722,9 @@ struct sctp_packet { ipfragok:1; /* So let ip fragment this packet */ }; -struct sctp_packet *sctp_packet_init(struct sctp_packet *, - struct sctp_transport *, - __u16 sport, __u16 dport); -struct sctp_packet *sctp_packet_config(struct sctp_packet *, __u32 vtag, int); +void sctp_packet_init(struct sctp_packet *, struct sctp_transport *, + __u16 sport, __u16 dport); +void sctp_packet_config(struct sctp_packet *, __u32 vtag, int); sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *, struct sctp_chunk *, int, gfp_t); sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *, diff --git a/net/sctp/output.c b/net/sctp/output.c index f5320a87341e..07ab5062e541 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -81,8 +81,8 @@ static void sctp_packet_reset(struct sctp_packet *packet) /* Config a packet. * This appears to be a followup set of initializations. */ -struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, - __u32 vtag, int ecn_capable) +void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, + int ecn_capable) { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; @@ -123,14 +123,12 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, if (chunk) sctp_packet_append_chunk(packet, chunk); } - - return packet; } /* Initialize the packet structure. */ -struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, - struct sctp_transport *transport, - __u16 sport, __u16 dport) +void sctp_packet_init(struct sctp_packet *packet, + struct sctp_transport *transport, + __u16 sport, __u16 dport) { struct sctp_association *asoc = transport->asoc; size_t overhead; @@ -151,8 +149,6 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->overhead = overhead; sctp_packet_reset(packet); packet->vtag = 0; - - return packet; } /* Free a packet. */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a95915ef9dba..9a223d5b2314 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6032,8 +6032,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, sctp_transport_route(transport, (union sctp_addr *)&chunk->dest, sctp_sk(net->sctp.ctl_sock)); - packet = sctp_packet_init(&transport->packet, transport, sport, dport); - packet = sctp_packet_config(packet, vtag, 0); + packet = &transport->packet; + sctp_packet_init(packet, transport, sport, dport); + sctp_packet_config(packet, vtag, 0); return packet; -- cgit v1.2.3 From 1946e672c173559155a3e210fe95dbf8b7b8ddf7 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 28 Dec 2016 17:52:32 +0800 Subject: ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob Different namespace application might require fast recycling TIME-WAIT sockets independently of the host. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 13 +------------ include/net/netns/ipv4.h | 11 +++++++++++ include/net/tcp.h | 1 - net/ipv4/af_inet.c | 2 -- net/ipv4/inet_timewait_sock.c | 3 +-- net/ipv4/proc.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 28 ++++++++++++++-------------- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 12 ++++++++---- net/ipv4/tcp_minisocks.c | 14 +++++--------- net/ipv6/tcp_ipv6.c | 7 ++++--- 12 files changed, 48 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c9b3eb70f340..6a75d67a30fd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -29,16 +29,6 @@ #include -struct inet_hashinfo; - -struct inet_timewait_death_row { - atomic_t tw_count; - - struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; - int sysctl_tw_recycle; - int sysctl_max_tw_buckets; -}; - struct inet_bind_bucket; /* @@ -125,8 +115,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, - struct inet_timewait_death_row *twdr, int family); +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0378e88f6fd3..fffd38453985 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -27,6 +27,16 @@ struct ping_group_range { kgid_t range[2]; }; +struct inet_hashinfo; + +struct inet_timewait_death_row { + atomic_t tw_count; + + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -111,6 +121,7 @@ struct netns_ipv4 { int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; + struct inet_timewait_death_row tcp_death_row; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6061963cca98..1da0aa724929 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -231,7 +231,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ extern int sysctl_tcp_timestamps; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f75069883f2b..aae410bb655a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1831,8 +1831,6 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(); - /* Setup TCP slab cache for open requests. */ tcp_init(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ddcd56c08d14..f8aff2c71cde 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, - struct inet_timewait_death_row *twdr, int family) +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { struct inet_timewait_sock *tw; struct sock *sk; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 7143ca1a6af9..0247ca032232 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - atomic_read(&tcp_death_row.tw_count), sockets, + atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 22cbd61079b5..66f8f1b1dc78 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -289,13 +289,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_max_tw_buckets", - .data = &tcp_death_row.sysctl_max_tw_buckets, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, @@ -309,13 +302,6 @@ static struct ctl_table ipv4_table[] = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, - { - .procname = "tcp_tw_recycle", - .data = &tcp_death_row.sysctl_tw_recycle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, @@ -960,6 +946,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_tw_buckets", + .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_tw_recycle", + .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4a044964da66..7f0d81c090ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3334,6 +3334,7 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + inet_hashinfo_init(&tcp_hashinfo); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, @@ -3378,7 +3379,6 @@ void __init tcp_init(void) cnt = tcp_hashinfo.ehash_mask + 1; - tcp_death_row.sysctl_max_tw_buckets = cnt / 2; sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); @@ -3399,6 +3399,7 @@ void __init tcp_init(void) pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); + tcp_v4_init(); tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); tcp_tasklet_init(); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c790754ae3e..c61480249835 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6363,7 +6363,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ - if (tcp_death_row.sysctl_tw_recycle) { + if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { bool strict; dst = af_ops->route_req(sk, &fl, req, &strict); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe9da4fb96bf..56b5f49e3f97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -146,6 +146,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct rtable *rt; int err; struct ip_options_rcu *inet_opt; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -196,7 +197,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) tcp_fetch_timewait_stamp(sk, &rt->dst); @@ -215,7 +216,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - err = inet_hash_connect(&tcp_death_row, sk); + err = inet_hash_connect(tcp_death_row, sk); if (err) goto failure; @@ -2457,6 +2458,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_tw_reuse = 0; + net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (tcp_hashinfo.ehash_mask + 1) / 2; + net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + return 0; fail: tcp_sk_exit(net); @@ -2466,7 +2471,7 @@ fail: static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); + inet_twsk_purge(&tcp_hashinfo, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { @@ -2477,7 +2482,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { - inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 28ce5ee831f5..06fde26a82b7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -29,12 +29,6 @@ int sysctl_tcp_abort_on_overflow __read_mostly; -struct inet_timewait_death_row tcp_death_row = { - .sysctl_max_tw_buckets = NR_FILE * 2, - .hashinfo = &tcp_hashinfo, -}; -EXPORT_SYMBOL_GPL(tcp_death_row); - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -100,6 +94,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; + struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { @@ -153,7 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) inet_twsk_reschedule(tw, tw->tw_timeout); @@ -264,11 +259,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; bool recycle_ok = false; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tcp_remember_stamp(sk); - tw = inet_twsk_alloc(sk, &tcp_death_row, state); + tw = inet_twsk_alloc(sk, tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 73bc8fc68acd..a4cdf6a34c30 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -123,6 +123,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct dst_entry *dst; int addr_type; int err; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -258,7 +259,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); @@ -273,7 +274,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - err = inet6_hash_connect(&tcp_death_row, sk); + err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; @@ -1948,7 +1949,7 @@ static void __net_exit tcpv6_net_exit(struct net *net) static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); + inet_twsk_purge(&tcp_hashinfo, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { -- cgit v1.2.3 From fee83d097b1620530f23bf6063f4ea251ba9c8c7 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 28 Dec 2016 17:52:33 +0800 Subject: ipv4: Namespaceify tcp_max_syn_backlog knob Different namespace application might require different maximal number of remembered connection requests. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/request_sock.h | 4 +--- net/core/request_sock.c | 2 -- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 2 -- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_ipv4.c | 7 +++++-- 7 files changed, 16 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index fffd38453985..8e3f5b6f26d5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,6 +122,7 @@ struct netns_ipv4 { unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; struct inet_timewait_death_row tcp_death_row; + int sysctl_max_syn_backlog; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 6ebe13eb1c4c..a12a5d25b27e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -1,7 +1,7 @@ /* * NET Generic infrastructure for Network protocols. * - * Definitions for request_sock + * Definitions for request_sock * * Authors: Arnaldo Carvalho de Melo * @@ -123,8 +123,6 @@ static inline void reqsk_put(struct request_sock *req) reqsk_free(req); } -extern int sysctl_max_syn_backlog; - /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 5d26056b6d8f..9b8727c67b58 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -34,8 +34,6 @@ * and it will increase in proportion to the memory of machine. * Note : Dont forget somaxconn that may limit backlog too. */ -int sysctl_max_syn_backlog = 256; -EXPORT_SYMBOL(sysctl_max_syn_backlog); void reqsk_queue_alloc(struct request_sock_queue *queue) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 66f8f1b1dc78..134d8e191366 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -323,13 +323,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_max_syn_backlog", - .data = &sysctl_max_syn_backlog, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_max_syn_backlog", + .data = &init_net.ipv4.sysctl_max_syn_backlog, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f0d81c090ce..2e3807d8eba8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3378,9 +3378,7 @@ void __init tcp_init(void) cnt = tcp_hashinfo.ehash_mask + 1; - sysctl_tcp_max_orphans = cnt / 2; - sysctl_max_syn_backlog = max(128, cnt / 256); tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c61480249835..ec6d84363024 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6377,8 +6377,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } /* Kill the following clause, if you dislike this way. */ else if (!net->ipv4.sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst, false, tmp_opt.saw_tstamp)) { /* Without syncookies last quarter of diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 56b5f49e3f97..7e4be4f361f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2419,7 +2419,7 @@ static void __net_exit tcp_sk_exit(struct net *net) static int __net_init tcp_sk_init(struct net *net) { - int res, cpu; + int res, cpu, cnt; net->ipv4.tcp_sk = alloc_percpu(struct sock *); if (!net->ipv4.tcp_sk) @@ -2458,10 +2458,13 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_tw_reuse = 0; + cnt = tcp_hashinfo.ehash_mask + 1; net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; - net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (tcp_hashinfo.ehash_mask + 1) / 2; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); + return 0; fail: tcp_sk_exit(net); -- cgit v1.2.3 From e4781421e883340b796da5a724bda7226817990b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Dec 2016 21:57:02 +0100 Subject: netfilter: merge udp and udplite conntrack helpers udplite was copied from udp, they are virtually 100% identical. This adds udplite tracker to udp instead, removes udplite module, and then makes the udplite tracker builtin. udplite will then simply re-use udp timeout settings. It makes little sense to add separate sysctls, nowadays we have fine-grained timeout policy support via the CT target. old: text data bss dec hex filename 1633 672 0 2305 901 nf_conntrack_proto_udp.o 1756 672 0 2428 97c nf_conntrack_proto_udplite.o 69526 17937 268 87731 156b3 nf_conntrack.ko new: text data bss dec hex filename 2442 1184 0 3626 e2a nf_conntrack_proto_udp.o 68565 17721 268 86554 1521a nf_conntrack.ko Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 1 + include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 1 + include/net/netns/conntrack.h | 16 -- net/netfilter/Makefile | 1 - net/netfilter/nf_conntrack_proto_udp.c | 123 ++++++++++ net/netfilter/nf_conntrack_proto_udplite.c | 324 ------------------------- 6 files changed, 125 insertions(+), 341 deletions(-) delete mode 100644 net/netfilter/nf_conntrack_proto_udplite.c (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 919e4e8af327..6ff32815641b 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -14,6 +14,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index eaea968f8657..c59b82456f89 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -5,6 +5,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index cf799fc3fdec..17724c62de97 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,19 +69,6 @@ struct nf_sctp_net { }; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - -struct nf_udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; -#endif - struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -94,9 +81,6 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct nf_udplite_net udplite; -#endif }; struct ct_pcpu { diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca30d1960f1d..bf5c577113b6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,7 +7,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o -nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 20f35ed68030..ae63944c9dc4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -108,6 +108,59 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +static int udplite_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + u8 pf, unsigned int hooknum) +{ + unsigned int udplen = skb->len - dataoff; + const struct udphdr *hdr; + struct udphdr _hdr; + unsigned int cscov; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (!hdr) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: short packet "); + return -NF_ACCEPT; + } + + cscov = ntohs(hdr->len); + if (cscov == 0) { + cscov = udplen; + } else if (cscov < sizeof(*hdr) || cscov > udplen) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: invalid checksum coverage "); + return -NF_ACCEPT; + } + + /* UDPLITE mandates checksums */ + if (!hdr->check) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: checksum missing "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, + pf)) { + if (LOG_INVALID(net, IPPROTO_UDPLITE)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} +#endif + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, @@ -290,6 +343,41 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); +#endif + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, @@ -322,3 +410,38 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +{ + .l3proto = PF_INET6, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .allow_clash = true, + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .packet = udp_packet, + .get_timeouts = udp_get_timeouts, + .new = udp_new, + .error = udplite_error, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, + .nla_policy = nf_ct_port_nla_policy, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, +}; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); +#endif diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c deleted file mode 100644 index c35f7bf05d8c..000000000000 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ /dev/null @@ -1,324 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * (C) 2007 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { - [UDPLITE_CT_UNREPLIED] = 30*HZ, - [UDPLITE_CT_REPLIED] = 180*HZ, -}; - -static inline struct nf_udplite_net *udplite_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.udplite; -} - -static bool udplite_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, - struct nf_conntrack_tuple *tuple) -{ - const struct udphdr *hp; - struct udphdr _hdr; - - /* Actually only need first 4 bytes to get ports. */ - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); - if (hp == NULL) - return false; - - tuple->src.u.udp.port = hp->source; - tuple->dst.u.udp.port = hp->dest; - return true; -} - -static bool udplite_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) -{ - tuple->src.u.udp.port = orig->dst.u.udp.port; - tuple->dst.u.udp.port = orig->src.u.udp.port; - return true; -} - -/* Print out the per-protocol part of the tuple. */ -static void udplite_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - -static unsigned int *udplite_get_timeouts(struct net *net) -{ - return udplite_pernet(net)->timeouts; -} - -/* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *ct, - const struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, - unsigned int *timeouts) -{ - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_REPLIED]); - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDPLITE_CT_UNREPLIED]); - } - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff, unsigned int *timeouts) -{ - return true; -} - -static int udplite_error(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info *ctinfo, - u_int8_t pf, - unsigned int hooknum) -{ - unsigned int udplen = skb->len - dataoff; - const struct udphdr *hdr; - struct udphdr _hdr; - unsigned int cscov; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); - return -NF_ACCEPT; - } - - cscov = ntohs(hdr->len); - if (cscov == 0) - cscov = udplen; - else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); - return -NF_ACCEPT; - } - - /* UDPLITE mandates checksums */ - if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); - return -NF_ACCEPT; - } - - /* Checksum invalid? Ignore. */ - if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && - nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, - pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); - return -NF_ACCEPT; - } - - return NF_ACCEPT; -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - -#include -#include - -static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], - struct net *net, void *data) -{ - unsigned int *timeouts = data; - struct nf_udplite_net *un = udplite_pernet(net); - - /* set default timeouts for UDPlite. */ - timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; - timeouts[UDPLITE_CT_REPLIED] = un->timeouts[UDPLITE_CT_REPLIED]; - - if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { - timeouts[UDPLITE_CT_UNREPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_UNREPLIED])) * HZ; - } - if (tb[CTA_TIMEOUT_UDPLITE_REPLIED]) { - timeouts[UDPLITE_CT_REPLIED] = - ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_REPLIED])) * HZ; - } - return 0; -} - -static int -udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) -{ - const unsigned int *timeouts = data; - - if (nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, - htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)) || - nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, - htonl(timeouts[UDPLITE_CT_REPLIED] / HZ))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -ENOSPC; -} - -static const struct nla_policy -udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { - [CTA_TIMEOUT_UDPLITE_UNREPLIED] = { .type = NLA_U32 }, - [CTA_TIMEOUT_UDPLITE_REPLIED] = { .type = NLA_U32 }, -}; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - -#ifdef CONFIG_SYSCTL -static struct ctl_table udplite_sysctl_table[] = { - { - .procname = "nf_conntrack_udplite_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_udplite_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_udplite_net *un) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(udplite_sysctl_table, - sizeof(udplite_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED]; -#endif - return 0; -} - -static int udplite_init_net(struct net *net, u_int16_t proto) -{ - struct nf_udplite_net *un = udplite_pernet(net); - struct nf_proto_net *pn = &un->pn; - - if (!pn->users) { - int i; - - for (i = 0 ; i < UDPLITE_CT_MAX; i++) - un->timeouts[i] = udplite_timeouts[i]; - } - - return udplite_kmemdup_sysctl_table(pn, un); -} - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = -{ - .l3proto = PF_INET, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); - -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = -{ - .l3proto = PF_INET6, - .l4proto = IPPROTO_UDPLITE, - .name = "udplite", - .allow_clash = true, - .pkt_to_tuple = udplite_pkt_to_tuple, - .invert_tuple = udplite_invert_tuple, - .print_tuple = udplite_print_tuple, - .packet = udplite_packet, - .get_timeouts = udplite_get_timeouts, - .new = udplite_new, - .error = udplite_error, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - .ctnl_timeout = { - .nlattr_to_obj = udplite_timeout_nlattr_to_obj, - .obj_to_nlattr = udplite_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, - .obj_size = sizeof(unsigned int) * - CTA_TIMEOUT_UDPLITE_MAX, - .nla_policy = udplite_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udplite_init_net, -}; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); -- cgit v1.2.3 From 3db5e3e707ebb9ab0ce3a2dcd924ed2ea525d770 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jan 2017 13:37:28 +0100 Subject: wireless: move IEEE80211_NUM_ACS to ieee80211.h This constant isn't really specific to mac80211, so move it "up" a level to ieee80211.h Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ include/net/mac80211.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe849329511a..87d1937e4671 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -185,6 +185,8 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) /* number of user priorities 802.11 uses */ #define IEEE80211_NUM_UPS 8 +/* number of ACs */ +#define IEEE80211_NUM_ACS 4 #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5345d358a510..5f5cb194cd78 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -147,7 +147,6 @@ enum ieee80211_ac_numbers { IEEE80211_AC_BE = 2, IEEE80211_AC_BK = 3, }; -#define IEEE80211_NUM_ACS 4 /** * struct ieee80211_tx_queue_params - transmit queue configuration -- cgit v1.2.3 From e691ac2f75b69bee743f0370d79454ba4429b175 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 4 Jan 2017 18:58:31 +0100 Subject: cfg80211: support ieee80211-freq-limit DT property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a helper for reading that new property and applying limitations of supported channels specified this way. It is used with devices that normally support a wide wireless band but in a given config are limited to some part of it (usually due to board design). For example a dual-band chipset may be able to support one band only because of used antennas. It's also common that tri-band routers have separated radios for lower and higher part of 5 GHz band and it may be impossible to say which is which without a DT info. Signed-off-by: Rafał Miłecki [add new function to documentation, fix link] Signed-off-by: Johannes Berg --- Documentation/80211/cfg80211.rst | 3 + include/net/cfg80211.h | 28 ++++++++ net/wireless/Makefile | 1 + net/wireless/of.c | 138 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 170 insertions(+) create mode 100644 net/wireless/of.c (limited to 'include/net') diff --git a/Documentation/80211/cfg80211.rst b/Documentation/80211/cfg80211.rst index b1e149ea6fee..eca534ab6172 100644 --- a/Documentation/80211/cfg80211.rst +++ b/Documentation/80211/cfg80211.rst @@ -44,6 +44,9 @@ Device registration .. kernel-doc:: include/net/cfg80211.h :functions: wiphy_new +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_read_of_freq_limits + .. kernel-doc:: include/net/cfg80211.h :functions: wiphy_register diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ca2ac1ce5862..41a9ecd82ca0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -311,6 +311,34 @@ struct ieee80211_supported_band { struct ieee80211_sta_vht_cap vht_cap; }; +/** + * wiphy_read_of_freq_limits - read frequency limits from device tree + * + * @wiphy: the wireless device to get extra limits for + * + * Some devices may have extra limitations specified in DT. This may be useful + * for chipsets that normally support more bands but are limited due to board + * design (e.g. by antennas or external power amplifier). + * + * This function reads info from DT and uses it to *modify* channels (disable + * unavailable ones). It's usually a *bad* idea to use it in drivers with + * shared channel data as DT limitations are device specific. You should make + * sure to call it only if channels in wiphy are copied and can be modified + * without affecting other devices. + * + * As this function access device node it has to be called after set_wiphy_dev. + * It also modifies channels so they have to be set first. + * If using this helper, call it before wiphy_register(). + */ +#ifdef CONFIG_OF +void wiphy_read_of_freq_limits(struct wiphy *wiphy); +#else /* CONFIG_OF */ +static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy) +{ +} +#endif /* !CONFIG_OF */ + + /* * Wireless hardware/device configuration structures and methods */ diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 4c9e39f04ef8..95b4c0915412 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o +cfg80211-$(CONFIG_OF) += of.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o diff --git a/net/wireless/of.c b/net/wireless/of.c new file mode 100644 index 000000000000..de221f0edca5 --- /dev/null +++ b/net/wireless/of.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2017 Rafał Miłecki + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include "core.h" + +static bool wiphy_freq_limits_valid_chan(struct wiphy *wiphy, + struct ieee80211_freq_range *freq_limits, + unsigned int n_freq_limits, + struct ieee80211_channel *chan) +{ + u32 bw = MHZ_TO_KHZ(20); + int i; + + for (i = 0; i < n_freq_limits; i++) { + struct ieee80211_freq_range *limit = &freq_limits[i]; + + if (cfg80211_does_bw_fit_range(limit, + MHZ_TO_KHZ(chan->center_freq), + bw)) + return true; + } + + return false; +} + +static void wiphy_freq_limits_apply(struct wiphy *wiphy, + struct ieee80211_freq_range *freq_limits, + unsigned int n_freq_limits) +{ + enum nl80211_band band; + int i; + + if (WARN_ON(!n_freq_limits)) + return; + + for (band = 0; band < NUM_NL80211_BANDS; band++) { + struct ieee80211_supported_band *sband = wiphy->bands[band]; + + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + struct ieee80211_channel *chan = &sband->channels[i]; + + if (chan->flags & IEEE80211_CHAN_DISABLED) + continue; + + if (!wiphy_freq_limits_valid_chan(wiphy, freq_limits, + n_freq_limits, + chan)) { + pr_debug("Disabling freq %d MHz as it's out of OF limits\n", + chan->center_freq); + chan->flags |= IEEE80211_CHAN_DISABLED; + } + } + } +} + +void wiphy_read_of_freq_limits(struct wiphy *wiphy) +{ + struct device *dev = wiphy_dev(wiphy); + struct device_node *np; + struct property *prop; + struct ieee80211_freq_range *freq_limits; + unsigned int n_freq_limits; + const __be32 *p; + int len, i; + int err = 0; + + if (!dev) + return; + np = dev_of_node(dev); + if (!np) + return; + + prop = of_find_property(np, "ieee80211-freq-limit", &len); + if (!prop) + return; + + if (!len || len % sizeof(u32) || len / sizeof(u32) % 2) { + dev_err(dev, "ieee80211-freq-limit wrong format"); + return; + } + n_freq_limits = len / sizeof(u32) / 2; + + freq_limits = kcalloc(n_freq_limits, sizeof(*freq_limits), GFP_KERNEL); + if (!freq_limits) { + err = -ENOMEM; + goto out_kfree; + } + + p = NULL; + for (i = 0; i < n_freq_limits; i++) { + struct ieee80211_freq_range *limit = &freq_limits[i]; + + p = of_prop_next_u32(prop, p, &limit->start_freq_khz); + if (!p) { + err = -EINVAL; + goto out_kfree; + } + + p = of_prop_next_u32(prop, p, &limit->end_freq_khz); + if (!p) { + err = -EINVAL; + goto out_kfree; + } + + if (!limit->start_freq_khz || + !limit->end_freq_khz || + limit->start_freq_khz >= limit->end_freq_khz) { + err = -EINVAL; + goto out_kfree; + } + } + + wiphy_freq_limits_apply(wiphy, freq_limits, n_freq_limits); + +out_kfree: + kfree(freq_limits); + if (err) + dev_err(dev, "Failed to get limits: %d\n", err); +} +EXPORT_SYMBOL(wiphy_read_of_freq_limits); -- cgit v1.2.3 From c7b371e34c0b9ed9e23271608448f89e6d66ba0a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 5 Jan 2017 19:33:59 -0800 Subject: net: ipv4: make fib_select_default static fib_select_default has a single caller within the same file. Make it static. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - net/ipv4/fib_semantics.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5f376af377c7..57c2a863d0b2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -344,7 +344,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -void fib_select_default(const struct flowi4 *flp, struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7a5b4c7d9a87..05c911d21782 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1434,7 +1434,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(const struct flowi4 *flp, struct fib_result *res) +static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; -- cgit v1.2.3 From a83863174a6137fb3e03f279c9dcdba9e35315d0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 6 Jan 2017 22:18:33 +0800 Subject: sctp: prepare asoc stream for stream reconf sctp stream reconf, described in RFC 6525, needs a structure to save per stream information in assoc, like stream state. In the future, sctp stream scheduler also needs it to save some stream scheduler params and queues. This patchset is to prepare the stream array in assoc for stream reconf. It defines sctp_stream that includes stream arrays inside to replace ssnmap. Note that we use different structures for IN and OUT streams, as the members in per OUT stream will get more and more different from per IN stream. v1->v2: - put these patches into a smaller group. v2->v3: - define sctp_stream to contain stream arrays, and create stream.c to put stream-related functions. - merge 3 patches into 1, as new sctp_stream has the same name with before. Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 - include/net/sctp/structs.h | 76 +++++++++++---------------- net/sctp/Makefile | 2 +- net/sctp/associola.c | 13 +++-- net/sctp/objcnt.c | 2 - net/sctp/sm_make_chunk.c | 10 ++-- net/sctp/sm_statefuns.c | 3 +- net/sctp/ssnmap.c | 125 --------------------------------------------- net/sctp/stream.c | 85 ++++++++++++++++++++++++++++++ net/sctp/ulpqueue.c | 36 ++++++------- 10 files changed, 147 insertions(+), 206 deletions(-) delete mode 100644 net/sctp/ssnmap.c create mode 100644 net/sctp/stream.c (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d8833a86cd7e..598d938b0d0a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -283,7 +283,6 @@ extern atomic_t sctp_dbg_objcnt_chunk; extern atomic_t sctp_dbg_objcnt_bind_addr; extern atomic_t sctp_dbg_objcnt_bind_bucket; extern atomic_t sctp_dbg_objcnt_addr; -extern atomic_t sctp_dbg_objcnt_ssnmap; extern atomic_t sctp_dbg_objcnt_datamsg; extern atomic_t sctp_dbg_objcnt_keys; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 87d56cc80a3c..4741ec240caf 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -82,7 +82,6 @@ struct sctp_outq; struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; -struct sctp_ssnmap; struct crypto_shash; @@ -377,54 +376,22 @@ typedef struct sctp_sender_hb_info { __u64 hb_nonce; } __packed sctp_sender_hb_info_t; -/* - * RFC 2960 1.3.2 Sequenced Delivery within Streams - * - * The term "stream" is used in SCTP to refer to a sequence of user - * messages that are to be delivered to the upper-layer protocol in - * order with respect to other messages within the same stream. This is - * in contrast to its usage in TCP, where it refers to a sequence of - * bytes (in this document a byte is assumed to be eight bits). - * ... - * - * This is the structure we use to track both our outbound and inbound - * SSN, or Stream Sequence Numbers. - */ - -struct sctp_stream { - __u16 *ssn; - unsigned int len; -}; - -struct sctp_ssnmap { - struct sctp_stream in; - struct sctp_stream out; -}; - -struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - gfp_t gfp); -void sctp_ssnmap_free(struct sctp_ssnmap *map); -void sctp_ssnmap_clear(struct sctp_ssnmap *map); +struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp); +void sctp_stream_free(struct sctp_stream *stream); +void sctp_stream_clear(struct sctp_stream *stream); /* What is the current SSN number for this stream? */ -static inline __u16 sctp_ssn_peek(struct sctp_stream *stream, __u16 id) -{ - return stream->ssn[id]; -} +#define sctp_ssn_peek(stream, type, sid) \ + ((stream)->type[sid].ssn) /* Return the next SSN number for this stream. */ -static inline __u16 sctp_ssn_next(struct sctp_stream *stream, __u16 id) -{ - return stream->ssn[id]++; -} +#define sctp_ssn_next(stream, type, sid) \ + ((stream)->type[sid].ssn++) /* Skip over this ssn and all below. */ -static inline void sctp_ssn_skip(struct sctp_stream *stream, __u16 id, - __u16 ssn) -{ - stream->ssn[id] = ssn+1; -} - +#define sctp_ssn_skip(stream, type, sid, ssn) \ + ((stream)->type[sid].ssn = ssn + 1) + /* * Pointers to address related SCTP functions. * (i.e. things that depend on the address family.) @@ -1331,6 +1298,25 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_out { + __u16 ssn; + __u8 state; +}; + +struct sctp_stream_in { + __u16 ssn; +}; + +struct sctp_stream { + struct sctp_stream_out *out; + struct sctp_stream_in *in; + __u16 outcnt; + __u16 incnt; +}; + +#define SCTP_STREAM_CLOSED 0x00 +#define SCTP_STREAM_OPEN 0x01 + /* SCTP_GET_ASSOC_STATS counters */ struct sctp_priv_assoc_stats { /* Maximum observed rto in the association during subsequent @@ -1746,8 +1732,8 @@ struct sctp_association { /* Default receive parameters */ __u32 default_rcv_context; - /* This tracks outbound ssn for a given stream. */ - struct sctp_ssnmap *ssnmap; + /* Stream arrays */ + struct sctp_stream *stream; /* All outbound chunks go through this structure. */ struct sctp_outq outqueue; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6c4f7496cec6..70f1b570bab9 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -11,7 +11,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o auth.o \ + output.o input.o debug.o stream.o auth.o \ offload.o sctp_probe-y := probe.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d3cc30c25c41..36294f7fb9a7 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -358,8 +358,8 @@ void sctp_association_free(struct sctp_association *asoc) sctp_tsnmap_free(&asoc->peer.tsn_map); - /* Free ssnmap storage. */ - sctp_ssnmap_free(asoc->ssnmap); + /* Free stream information. */ + sctp_stream_free(asoc->stream); /* Clean up the bound address list. */ sctp_bind_addr_free(&asoc->base.bind_addr); @@ -1137,7 +1137,7 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Reinitialize SSN for both local streams * and peer's streams. */ - sctp_ssnmap_clear(asoc->ssnmap); + sctp_stream_clear(asoc->stream); /* Flush the ULP reassembly and ordered queue. * Any data there will now be stale and will @@ -1162,10 +1162,9 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - if (!asoc->ssnmap) { - /* Move the ssnmap. */ - asoc->ssnmap = new->ssnmap; - new->ssnmap = NULL; + if (!asoc->stream) { + asoc->stream = new->stream; + new->stream = NULL; } if (!asoc->assoc_id) { diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 40e7fac96c41..105ac3327b28 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -51,7 +51,6 @@ SCTP_DBG_OBJCNT(bind_addr); SCTP_DBG_OBJCNT(bind_bucket); SCTP_DBG_OBJCNT(chunk); SCTP_DBG_OBJCNT(addr); -SCTP_DBG_OBJCNT(ssnmap); SCTP_DBG_OBJCNT(datamsg); SCTP_DBG_OBJCNT(keys); @@ -67,7 +66,6 @@ static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { SCTP_DBG_OBJCNT_ENTRY(bind_addr), SCTP_DBG_OBJCNT_ENTRY(bind_bucket), SCTP_DBG_OBJCNT_ENTRY(addr), - SCTP_DBG_OBJCNT_ENTRY(ssnmap), SCTP_DBG_OBJCNT_ENTRY(datamsg), SCTP_DBG_OBJCNT_ENTRY(keys), }; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9e9690b7afe1..a15d824a313d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1536,7 +1536,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) /* All fragments will be on the same stream */ sid = ntohs(chunk->subh.data_hdr->stream); - stream = &chunk->asoc->ssnmap->out; + stream = chunk->asoc->stream; /* Now assign the sequence number to the entire message. * All fragments must have the same stream sequence number. @@ -1547,9 +1547,9 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) ssn = 0; } else { if (lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG) - ssn = sctp_ssn_next(stream, sid); + ssn = sctp_ssn_next(stream, out, sid); else - ssn = sctp_ssn_peek(stream, sid); + ssn = sctp_ssn_peek(stream, out, sid); } lchunk->subh.data_hdr->ssn = htons(ssn); @@ -2444,9 +2444,9 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, if (!asoc->temp) { int error; - asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams, + asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, asoc->c.sinit_num_ostreams, gfp); - if (!asoc->ssnmap) + if (!asoc->stream) goto clean_up; error = sctp_assoc_set_id(asoc, gfp); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3382ef254e7b..0ceded37d20b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6274,9 +6274,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and is invalid. */ ssn = ntohs(data_hdr->ssn); - if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->ssnmap->in, sid))) { + if (ordered && SSN_lt(ssn, sctp_ssn_peek(asoc->stream, in, sid))) return SCTP_IERROR_PROTO_VIOLATION; - } /* Send the data up to the user. Note: Schedule the * SCTP_CMD_CHUNK_ULP cmd before the SCTP_CMD_GEN_SACK, as the SACK diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c deleted file mode 100644 index b9c8521c1a98..000000000000 --- a/net/sctp/ssnmap.c +++ /dev/null @@ -1,125 +0,0 @@ -/* SCTP kernel implementation - * Copyright (c) 2003 International Business Machines, Corp. - * - * This file is part of the SCTP kernel implementation - * - * These functions manipulate sctp SSN tracker. - * - * This SCTP implementation is free software; - * you can redistribute it and/or modify it under the terms of - * the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This SCTP implementation is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * ************************ - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, see - * . - * - * Please send any bug reports or fixes you make to the - * email address(es): - * lksctp developers - * - * Written or modified by: - * Jon Grimm - */ - -#include -#include -#include -#include - -static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in, - __u16 out); - -/* Storage size needed for map includes 2 headers and then the - * specific needs of in or out streams. - */ -static inline size_t sctp_ssnmap_size(__u16 in, __u16 out) -{ - return sizeof(struct sctp_ssnmap) + (in + out) * sizeof(__u16); -} - - -/* Create a new sctp_ssnmap. - * Allocate room to store at least 'len' contiguous TSNs. - */ -struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - gfp_t gfp) -{ - struct sctp_ssnmap *retval; - int size; - - size = sctp_ssnmap_size(in, out); - if (size <= KMALLOC_MAX_SIZE) - retval = kmalloc(size, gfp); - else - retval = (struct sctp_ssnmap *) - __get_free_pages(gfp, get_order(size)); - if (!retval) - goto fail; - - if (!sctp_ssnmap_init(retval, in, out)) - goto fail_map; - - SCTP_DBG_OBJCNT_INC(ssnmap); - - return retval; - -fail_map: - if (size <= KMALLOC_MAX_SIZE) - kfree(retval); - else - free_pages((unsigned long)retval, get_order(size)); -fail: - return NULL; -} - - -/* Initialize a block of memory as a ssnmap. */ -static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in, - __u16 out) -{ - memset(map, 0x00, sctp_ssnmap_size(in, out)); - - /* Start 'in' stream just after the map header. */ - map->in.ssn = (__u16 *)&map[1]; - map->in.len = in; - - /* Start 'out' stream just after 'in'. */ - map->out.ssn = &map->in.ssn[in]; - map->out.len = out; - - return map; -} - -/* Clear out the ssnmap streams. */ -void sctp_ssnmap_clear(struct sctp_ssnmap *map) -{ - size_t size; - - size = (map->in.len + map->out.len) * sizeof(__u16); - memset(map->in.ssn, 0x00, size); -} - -/* Dispose of a ssnmap. */ -void sctp_ssnmap_free(struct sctp_ssnmap *map) -{ - int size; - - if (unlikely(!map)) - return; - - size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= KMALLOC_MAX_SIZE) - kfree(map); - else - free_pages((unsigned long)map, get_order(size)); - - SCTP_DBG_OBJCNT_DEC(ssnmap); -} diff --git a/net/sctp/stream.c b/net/sctp/stream.c new file mode 100644 index 000000000000..f86de43cbbe5 --- /dev/null +++ b/net/sctp/stream.c @@ -0,0 +1,85 @@ +/* SCTP kernel implementation + * (C) Copyright IBM Corp. 2001, 2004 + * Copyright (c) 1999-2000 Cisco, Inc. + * Copyright (c) 1999-2001 Motorola, Inc. + * Copyright (c) 2001 Intel Corp. + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp tsn mapping array. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email address(es): + * lksctp developers + * + * Written or modified by: + * Xin Long + */ + +#include + +struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +{ + struct sctp_stream *stream; + int i; + + stream = kzalloc(sizeof(*stream), gfp); + if (!stream) + return NULL; + + stream->outcnt = outcnt; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) { + kfree(stream); + return NULL; + } + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = incnt; + stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); + if (!stream->in) { + kfree(stream->out); + kfree(stream); + return NULL; + } + + return stream; +} + +void sctp_stream_free(struct sctp_stream *stream) +{ + if (unlikely(!stream)) + return; + + kfree(stream->out); + kfree(stream->in); + kfree(stream); +} + +void sctp_stream_clear(struct sctp_stream *stream) +{ + int i; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; +} diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 84d0fdaf7de9..aa3624d50278 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -760,11 +760,11 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; - struct sctp_stream *in; + struct sctp_stream *stream; __u16 sid, csid, cssn; sid = event->stream; - in = &ulpq->asoc->ssnmap->in; + stream = ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; @@ -782,11 +782,11 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, if (csid < sid) continue; - if (cssn != sctp_ssn_peek(in, sid)) + if (cssn != sctp_ssn_peek(stream, in, sid)) break; - /* Found it, so mark in the ssnmap. */ - sctp_ssn_next(in, sid); + /* Found it, so mark in the stream. */ + sctp_ssn_next(stream, in, sid); __skb_unlink(pos, &ulpq->lobby); @@ -849,7 +849,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { __u16 sid, ssn; - struct sctp_stream *in; + struct sctp_stream *stream; /* Check if this message needs ordering. */ if (SCTP_DATA_UNORDERED & event->msg_flags) @@ -858,10 +858,10 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; - in = &ulpq->asoc->ssnmap->in; + stream = ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ - if (ssn != sctp_ssn_peek(in, sid)) { + if (ssn != sctp_ssn_peek(stream, in, sid)) { /* We've received something out of order, so find where it * needs to be placed. We order by stream and then by SSN. */ @@ -870,7 +870,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, } /* Mark that the next chunk has been found. */ - sctp_ssn_next(in, sid); + sctp_ssn_next(stream, in, sid); /* Go find any other chunks that were waiting for * ordering. @@ -888,12 +888,12 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_ulpevent *event; - struct sctp_stream *in; + struct sctp_stream *stream; struct sk_buff_head temp; struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; - in = &ulpq->asoc->ssnmap->in; + stream = ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); @@ -912,7 +912,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) continue; /* see if this ssn has been marked by skipping */ - if (!SSN_lt(cssn, sctp_ssn_peek(in, csid))) + if (!SSN_lt(cssn, sctp_ssn_peek(stream, in, csid))) break; __skb_unlink(pos, lobby); @@ -932,8 +932,8 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) csid = cevent->stream; cssn = cevent->ssn; - if (csid == sid && cssn == sctp_ssn_peek(in, csid)) { - sctp_ssn_next(in, csid); + if (csid == sid && cssn == sctp_ssn_peek(stream, in, csid)) { + sctp_ssn_next(stream, in, csid); __skb_unlink(pos, lobby); __skb_queue_tail(&temp, pos); event = sctp_skb2event(pos); @@ -955,17 +955,17 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) */ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) { - struct sctp_stream *in; + struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ - in = &ulpq->asoc->ssnmap->in; + stream = ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ - if (SSN_lt(ssn, sctp_ssn_peek(in, sid))) + if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) return; /* Mark that we are no longer expecting this SSN or lower. */ - sctp_ssn_skip(in, sid, ssn); + sctp_ssn_skip(stream, in, sid, ssn); /* Go find any other chunks that were waiting for * ordering and deliver them if needed. -- cgit v1.2.3 From bc1f44709cf27fb2a5766cadafe7e2ad5e9cb221 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 6 Jan 2017 19:12:52 -0800 Subject: net: make ndo_get_stats64 a void function The network device operation for reading statistics is only called in one place, and it ignores the return value. Having a structure return value is potentially confusing because some future driver could incorrectly assume that the return value was used. Fix all drivers with ndo_get_stats64 to have a void function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++------ drivers/net/dummy.c | 5 ++--- drivers/net/ethernet/alacritech/slicoss.c | 6 ++---- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 ++++------ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 6 ++---- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 4 +--- drivers/net/ethernet/atheros/alx/main.c | 6 ++---- drivers/net/ethernet/broadcom/b44.c | 5 ++--- drivers/net/ethernet/broadcom/bnx2.c | 5 ++--- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++---- drivers/net/ethernet/broadcom/tg3.c | 8 +++----- drivers/net/ethernet/brocade/bna/bnad.c | 6 ++---- drivers/net/ethernet/calxeda/xgmac.c | 5 ++--- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 5 ++--- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +++---- drivers/net/ethernet/cisco/enic/enic_main.c | 8 +++----- drivers/net/ethernet/ec_bhf.c | 4 +--- drivers/net/ethernet/emulex/benet/be_main.c | 5 ++--- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6 ++---- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 6 ++---- drivers/net/ethernet/ibm/ehea/ehea_main.c | 5 ++--- drivers/net/ethernet/intel/e1000e/e1000.h | 4 ++-- drivers/net/ethernet/intel/e1000e/netdev.c | 5 ++--- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 6 ++---- drivers/net/ethernet/intel/i40e/i40e.h | 5 ++--- drivers/net/ethernet/intel/i40e/i40e_main.c | 18 ++++++------------ drivers/net/ethernet/intel/igb/igb_main.c | 10 ++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 ++++--- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 ++---- drivers/net/ethernet/marvell/mvneta.c | 4 +--- drivers/net/ethernet/marvell/mvpp2.c | 4 +--- drivers/net/ethernet/marvell/sky2.c | 6 ++---- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +-- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +--- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 3 +-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 9 ++++----- drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 +--- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 ++---- drivers/net/ethernet/nvidia/forcedeth.c | 4 +--- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 10 ++++------ drivers/net/ethernet/qlogic/qede/qede_main.c | 7 ++----- drivers/net/ethernet/qualcomm/emac/emac.c | 6 ++---- drivers/net/ethernet/realtek/8139too.c | 9 +++------ drivers/net/ethernet/realtek/r8169.c | 4 +--- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 8 ++------ drivers/net/ethernet/sfc/efx.c | 6 ++---- drivers/net/ethernet/sfc/falcon/efx.c | 6 ++---- drivers/net/ethernet/sun/niu.c | 6 ++---- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 4 +--- drivers/net/ethernet/tile/tilepro.c | 4 ++-- drivers/net/ethernet/via/via-rhine.c | 8 +++----- drivers/net/fjes/fjes_main.c | 7 ++----- drivers/net/hyperv/netvsc_drv.c | 6 ++---- drivers/net/ifb.c | 6 ++---- drivers/net/ipvlan/ipvlan_main.c | 5 ++--- drivers/net/loopback.c | 5 ++--- drivers/net/macsec.c | 8 +++----- drivers/net/macvlan.c | 5 ++--- drivers/net/nlmon.c | 4 +--- drivers/net/ppp/ppp_generic.c | 4 +--- drivers/net/slip/slip.c | 3 +-- drivers/net/team/team.c | 3 +-- drivers/net/tun.c | 3 +-- drivers/net/veth.c | 6 ++---- drivers/net/virtio_net.c | 6 ++---- drivers/net/vmxnet3/vmxnet3_ethtool.c | 4 +--- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- drivers/net/vrf.c | 5 ++--- drivers/net/xen-netfront.c | 6 ++---- drivers/staging/netlogic/xlr_net.c | 10 +--------- include/linux/netdevice.h | 8 ++++---- include/net/ip_tunnels.h | 4 ++-- net/8021q/vlan_dev.c | 5 ++--- net/bridge/br_device.c | 6 ++---- net/ipv4/ip_tunnel_core.c | 6 ++---- net/l2tp/l2tp_eth.c | 6 ++---- net/mac80211/iface.c | 4 +--- net/openvswitch/vport-internal_dev.c | 4 +--- net/sched/sch_teql.c | 5 ++--- 82 files changed, 166 insertions(+), 309 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8029dd4912b6..36919221b3f0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -211,8 +211,8 @@ static int lacp_fast; static int bond_init(struct net_device *bond_dev); static void bond_uninit(struct net_device *bond_dev); -static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, - struct rtnl_link_stats64 *stats); +static void bond_get_stats(struct net_device *bond_dev, + struct rtnl_link_stats64 *stats); static void bond_slave_arr_handler(struct work_struct *work); static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, int mod); @@ -3337,8 +3337,8 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } -static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, - struct rtnl_link_stats64 *stats) +static void bond_get_stats(struct net_device *bond_dev, + struct rtnl_link_stats64 *stats) { struct bonding *bond = netdev_priv(bond_dev); struct rtnl_link_stats64 temp; @@ -3362,8 +3362,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); - - return stats; } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 6421835f11b7..1f2de4e8207c 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -54,8 +54,8 @@ struct pcpu_dstats { struct u64_stats_sync syncp; }; -static struct rtnl_link_stats64 *dummy_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void dummy_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { int i; @@ -73,7 +73,6 @@ static struct rtnl_link_stats64 *dummy_get_stats64(struct net_device *dev, stats->tx_bytes += tbytes; stats->tx_packets += tpackets; } - return stats; } static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c index b21d8aa8d653..15a8096c60df 100644 --- a/drivers/net/ethernet/alacritech/slicoss.c +++ b/drivers/net/ethernet/alacritech/slicoss.c @@ -1471,8 +1471,8 @@ drop_skb: return NETDEV_TX_OK; } -static struct rtnl_link_stats64 *slic_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *lst) +static void slic_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *lst) { struct slic_device *sdev = netdev_priv(dev); struct slic_stats *stats = &sdev->stats; @@ -1489,8 +1489,6 @@ static struct rtnl_link_stats64 *slic_get_stats(struct net_device *dev, SLIC_GET_STATS_COUNTER(lst->rx_crc_errors, stats, rx_crc); SLIC_GET_STATS_COUNTER(lst->rx_fifo_errors, stats, rx_oflow802); SLIC_GET_STATS_COUNTER(lst->tx_carrier_errors, stats, tx_carrier); - - return lst; } static int slic_get_sset_count(struct net_device *dev, int sset) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index cc8b13ebfa75..aca95b397393 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2165,19 +2165,19 @@ err: ena_com_delete_debug_area(adapter->ena_dev); } -static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void ena_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct ena_adapter *adapter = netdev_priv(netdev); struct ena_admin_basic_stats ena_stats; int rc; if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) - return NULL; + return; rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); if (rc) - return NULL; + return; stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) | ena_stats.tx_bytes_low; @@ -2204,8 +2204,6 @@ static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, stats->rx_errors = 0; stats->tx_errors = 0; - - return stats; } static const struct net_device_ops ena_netdev_ops = { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 155190db682d..130de11fa553 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1759,8 +1759,8 @@ static void xgbe_tx_timeout(struct net_device *netdev) schedule_work(&pdata->restart_work); } -static struct rtnl_link_stats64 *xgbe_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *s) +static void xgbe_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *s) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_mmc_stats *pstats = &pdata->mmc_stats; @@ -1786,8 +1786,6 @@ static struct rtnl_link_stats64 *xgbe_get_stats64(struct net_device *netdev, s->tx_dropped = netdev->stats.tx_dropped; DBGPR("<--%s\n", __func__); - - return s; } static int xgbe_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 523b8eff6d7b..45f2204e6695 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1453,7 +1453,7 @@ err: return ret; } -static struct rtnl_link_stats64 *xgene_enet_get_stats64( +static void xgene_enet_get_stats64( struct net_device *ndev, struct rtnl_link_stats64 *storage) { @@ -1484,8 +1484,6 @@ static struct rtnl_link_stats64 *xgene_enet_get_stats64( } } memcpy(storage, stats, sizeof(struct rtnl_link_stats64)); - - return storage; } static int xgene_enet_set_mac_address(struct net_device *ndev, void *addr) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c8f525574d68..c66195d00ed4 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1643,8 +1643,8 @@ static void alx_poll_controller(struct net_device *netdev) } #endif -static struct rtnl_link_stats64 *alx_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *net_stats) +static void alx_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *net_stats) { struct alx_priv *alx = netdev_priv(dev); struct alx_hw_stats *hw_stats = &alx->hw.stats; @@ -1688,8 +1688,6 @@ static struct rtnl_link_stats64 *alx_get_stats64(struct net_device *dev, net_stats->rx_packets = hw_stats->rx_ok + net_stats->rx_errors; spin_unlock(&alx->stats_lock); - - return net_stats; } static const struct net_device_ops alx_netdev_ops = { diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 48707ed76ffc..7aef70f7d8ef 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1674,8 +1674,8 @@ static int b44_close(struct net_device *dev) return 0; } -static struct rtnl_link_stats64 *b44_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *nstat) +static void b44_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *nstat) { struct b44 *bp = netdev_priv(dev); struct b44_hw_stats *hwstat = &bp->hw_stats; @@ -1718,7 +1718,6 @@ static struct rtnl_link_stats64 *b44_get_stats64(struct net_device *dev, #endif } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); - return nstat; } static int __b44_load_mcast(struct b44 *bp, struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index d5d1026be4b7..de1d07c08495 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6821,13 +6821,13 @@ bnx2_save_stats(struct bnx2 *bp) (unsigned long) (bp->stats_blk->ctr + \ bp->temp_stats_blk->ctr) -static struct rtnl_link_stats64 * +static void bnx2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) { struct bnx2 *bp = netdev_priv(dev); if (bp->stats_blk == NULL) - return net_stats; + return; net_stats->rx_packets = GET_64BIT_NET_STATS(stat_IfHCInUcastPkts) + @@ -6891,7 +6891,6 @@ bnx2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) GET_32BIT_NET_STATS(stat_IfInMBUFDiscards) + GET_32BIT_NET_STATS(stat_FwRxDrop); - return net_stats; } /* All ethtool functions called with rtnl_lock */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 98e948489700..e5f458396e1a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5879,7 +5879,7 @@ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -static struct rtnl_link_stats64 * +static void bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u32 i; @@ -5888,7 +5888,7 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) memset(stats, 0, sizeof(struct rtnl_link_stats64)); if (!bp->bnapi) - return stats; + return; /* TODO check if we need to synchronize with bnxt_close path */ for (i = 0; i < bp->cp_nr_rings; i++) { @@ -5935,8 +5935,6 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns); stats->tx_errors = le64_to_cpu(tx->tx_err); } - - return stats; } static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 185e9e047aa9..800328f562fa 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14142,8 +14142,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .set_link_ksettings = tg3_set_link_ksettings, }; -static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void tg3_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct tg3 *tp = netdev_priv(dev); @@ -14151,13 +14151,11 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, if (!tp->hw_stats) { *stats = tp->net_stats_prev; spin_unlock_bh(&tp->lock); - return stats; + return; } tg3_get_nstats(tp, stats); spin_unlock_bh(&tp->lock); - - return stats; } static void tg3_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 112030828c4b..73a94113db1f 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3111,7 +3111,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) * Used spin_lock to synchronize reading of stats structures, which * is written by BNA under the same lock. */ -static struct rtnl_link_stats64 * +static void bnad_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct bnad *bnad = netdev_priv(netdev); @@ -3123,8 +3123,6 @@ bnad_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) bnad_netdev_hwstats_fill(bnad, stats); spin_unlock_irqrestore(&bnad->bna_lock, flags); - - return stats; } static void @@ -3427,7 +3425,7 @@ static const struct net_device_ops bnad_netdev_ops = { .ndo_open = bnad_open, .ndo_stop = bnad_stop, .ndo_start_xmit = bnad_start_xmit, - .ndo_get_stats64 = bnad_get_stats64, + .ndo_get_stats64 = bnad_get_stats64, .ndo_set_rx_mode = bnad_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = bnad_set_mac_address, diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index ce7de6f72512..b0540658afad 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1446,9 +1446,9 @@ static void xgmac_poll_controller(struct net_device *dev) } #endif -static struct rtnl_link_stats64 * +static void xgmac_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) + struct rtnl_link_stats64 *storage) { struct xgmac_priv *priv = netdev_priv(dev); void __iomem *base = priv->base; @@ -1476,7 +1476,6 @@ xgmac_get_stats64(struct net_device *dev, writel(0, base + XGMAC_MMC_CTRL); spin_unlock_bh(&priv->stats_lock); - return storage; } static int xgmac_set_mac_address(struct net_device *dev, void *p) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 2006f58b14b1..273eafdb1c57 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1461,8 +1461,8 @@ void nicvf_update_stats(struct nicvf *nic) nicvf_update_sq_stats(nic, qidx); } -static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void nicvf_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct nicvf *nic = netdev_priv(netdev); struct nicvf_hw_stats *hw_stats = &nic->hw_stats; @@ -1478,7 +1478,6 @@ static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, stats->tx_packets = hw_stats->tx_frames; stats->tx_dropped = hw_stats->tx_drops; - return stats; } static void nicvf_tx_timeout(struct net_device *dev) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 629b11879ceb..3349e1f376c3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2375,8 +2375,8 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, } EXPORT_SYMBOL(cxgb4_remove_server_filter); -static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *ns) +static void cxgb_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *ns) { struct port_stats stats; struct port_info *p = netdev_priv(dev); @@ -2389,7 +2389,7 @@ static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, spin_lock(&adapter->stats_lock); if (!netif_device_present(dev)) { spin_unlock(&adapter->stats_lock); - return ns; + return; } t4_get_port_stats_offset(adapter, p->tx_chan, &stats, &p->stats_base); @@ -2423,7 +2423,6 @@ static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, ns->tx_errors = stats.tx_error_frames; ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err + ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors; - return ns; } static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index cdd7a1a59aa7..c5842c525eed 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -680,8 +680,8 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, } /* dev_base_lock rwlock held, nominally process context */ -static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, - struct rtnl_link_stats64 *net_stats) +static void enic_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; @@ -693,7 +693,7 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, * recorded stats. */ if (err == -ENOMEM) - return net_stats; + return; net_stats->tx_packets = stats->tx.tx_frames_ok; net_stats->tx_bytes = stats->tx.tx_bytes_ok; @@ -707,8 +707,6 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, net_stats->rx_over_errors = enic->rq_truncated_pkts; net_stats->rx_crc_errors = enic->rq_bad_fcs; net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; - - return net_stats; } static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr) diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index 7bf78a0d322c..278f139f2a22 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -457,7 +457,7 @@ static int ec_bhf_stop(struct net_device *net_dev) return 0; } -static struct rtnl_link_stats64 * +static void ec_bhf_get_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) { @@ -472,8 +472,6 @@ ec_bhf_get_stats(struct net_device *net_dev, stats->tx_bytes = priv->stat_tx_bytes; stats->rx_bytes = priv->stat_rx_bytes; - - return stats; } static const struct net_device_ops ec_bhf_netdev_ops = { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 225e9a4877d7..0a679d2eaeee 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -639,8 +639,8 @@ void be_parse_stats(struct be_adapter *adapter) } } -static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void be_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct be_adapter *adapter = netdev_priv(netdev); struct be_drv_stats *drvs = &adapter->drv_stats; @@ -704,7 +704,6 @@ static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev, stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop + drvs->rx_input_fifo_overflow_drop + drvs->rx_drops_no_pbuf; - return stats; } void be_link_status_update(struct be_adapter *adapter, u8 link_status) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index c9b7ad65e563..b7cbc26a0911 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -313,8 +313,8 @@ static void dpaa_tx_timeout(struct net_device *net_dev) /* Calculates the statistics for the given device by adding the statistics * collected by each CPU. */ -static struct rtnl_link_stats64 *dpaa_get_stats64(struct net_device *net_dev, - struct rtnl_link_stats64 *s) +static void dpaa_get_stats64(struct net_device *net_dev, + struct rtnl_link_stats64 *s) { int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64); struct dpaa_priv *priv = netdev_priv(net_dev); @@ -332,8 +332,6 @@ static struct rtnl_link_stats64 *dpaa_get_stats64(struct net_device *net_dev, for (j = 0; j < numstats; j++) netstats[j] += cpustats[j]; } - - return s; } static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 672b64606321..b7cb61385ad8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1625,8 +1625,8 @@ void hns_nic_set_rx_mode(struct net_device *ndev) netdev_err(ndev, "sync uc address fail\n"); } -struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev, - struct rtnl_link_stats64 *stats) +static void hns_nic_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) { int idx = 0; u64 tx_bytes = 0; @@ -1668,8 +1668,6 @@ struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev, stats->tx_window_errors = ndev->stats.tx_window_errors; stats->rx_compressed = ndev->stats.rx_compressed; stats->tx_compressed = ndev->stats.tx_compressed; - - return stats; } static u16 diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 702446a93697..1e53d7a82675 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -328,8 +328,8 @@ out: spin_unlock_irqrestore(&ehea_bcmc_regs.lock, flags); } -static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void ehea_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct ehea_port *port = netdev_priv(dev); u64 rx_packets = 0, tx_packets = 0, rx_bytes = 0, tx_bytes = 0; @@ -352,7 +352,6 @@ static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev, stats->multicast = port->stats.multicast; stats->rx_errors = port->stats.rx_errors; - return stats; } static void ehea_update_stats(struct work_struct *work) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 879cca47b021..a29b12e80855 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -493,8 +493,8 @@ int e1000e_setup_rx_resources(struct e1000_ring *ring); int e1000e_setup_tx_resources(struct e1000_ring *ring); void e1000e_free_rx_resources(struct e1000_ring *ring); void e1000e_free_tx_resources(struct e1000_ring *ring); -struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats); +void e1000e_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats); void e1000e_set_interrupt_capability(struct e1000_adapter *adapter); void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter); void e1000e_get_hw_control(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index af3960853a32..723025b317cc 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5920,8 +5920,8 @@ static void e1000_reset_task(struct work_struct *work) * * Returns the address of the device statistics structure. **/ -struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +void e1000e_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -5958,7 +5958,6 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, /* Tx Dropped needs to be maintained elsewhere */ spin_unlock(&adapter->stats64_lock); - return stats; } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index bc5ef6eb3dd6..01db688cf539 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1118,8 +1118,8 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface) * Returns 64bit statistics, for use in the ndo_get_stats64 callback. This * function replaces fm10k_get_stats for kernels which support it. */ -static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void fm10k_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_ring *ring; @@ -1164,8 +1164,6 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, /* following stats updated by fm10k_service_task() */ stats->rx_missed_errors = netdev->stats.rx_missed_errors; - - return stats; } int fm10k_setup_tc(struct net_device *dev, u8 tc) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index ba8d30984bee..342007df4663 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -834,9 +834,8 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); #ifdef I40E_FCOE -struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( - struct net_device *netdev, - struct rtnl_link_stats64 *storage); +void i40e_get_netdev_stats_struct(struct net_device *netdev, + struct rtnl_link_stats64 *storage); int i40e_set_mac(struct net_device *netdev, void *p); void i40e_set_rx_mode(struct net_device *netdev); #endif diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ad4cf639430e..b2f76d24000d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -409,15 +409,11 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi) * Returns the address of the device statistics structure. * The statistics are actually updated from the service task. **/ -#ifdef I40E_FCOE -struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( - struct net_device *netdev, - struct rtnl_link_stats64 *stats) -#else -static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( - struct net_device *netdev, - struct rtnl_link_stats64 *stats) +#ifndef I40E_FCOE +static #endif +void i40e_get_netdev_stats_struct(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_ring *tx_ring, *rx_ring; @@ -426,10 +422,10 @@ static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( int i; if (test_bit(__I40E_DOWN, &vsi->state)) - return stats; + return; if (!vsi->tx_rings) - return stats; + return; rcu_read_lock(); for (i = 0; i < vsi->num_queue_pairs; i++) { @@ -469,8 +465,6 @@ static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( stats->rx_dropped = vsi_stats->rx_dropped; stats->rx_crc_errors = vsi_stats->rx_crc_errors; stats->rx_length_errors = vsi_stats->rx_length_errors; - - return stats; } /** diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 594604e09f8d..7546109d4980 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -137,8 +137,8 @@ static void igb_update_phy_info(unsigned long); static void igb_watchdog(unsigned long); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); -static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); +static void igb_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); static void igb_set_uta(struct igb_adapter *adapter, bool set); @@ -5404,8 +5404,8 @@ static void igb_reset_task(struct work_struct *work) * @netdev: network interface device structure * @stats: rtnl_link_stats64 pointer **/ -static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void igb_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -5413,8 +5413,6 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, igb_update_stats(adapter, &adapter->stats64); memcpy(stats, &adapter->stats64, sizeof(*stats)); spin_unlock(&adapter->stats64_lock); - - return stats; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0c6eca570791..ffe7d940d9ff 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8173,8 +8173,9 @@ static void ixgbe_netpoll(struct net_device *netdev) } #endif -static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) + +static void ixgbe_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct ixgbe_adapter *adapter = netdev_priv(netdev); int i; @@ -8212,13 +8213,13 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, } } rcu_read_unlock(); + /* following stats updated by ixgbe_watchdog_task() */ stats->multicast = netdev->stats.multicast; stats->rx_errors = netdev->stats.rx_errors; stats->rx_length_errors = netdev->stats.rx_length_errors; stats->rx_crc_errors = netdev->stats.rx_crc_errors; stats->rx_missed_errors = netdev->stats.rx_missed_errors; - return stats; } #ifdef CONFIG_IXGBE_DCB diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 1a28349114f8..b06863560c7d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3896,8 +3896,8 @@ static void ixgbevf_shutdown(struct pci_dev *pdev) ixgbevf_suspend(pdev, PMSG_SUSPEND); } -static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void ixgbevf_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); unsigned int start; @@ -3930,8 +3930,6 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, stats->tx_bytes += bytes; stats->tx_packets += packets; } - - return stats; } #define IXGBEVF_MAX_MAC_HDR_LEN 127 diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e05e22705cf7..3607d8febbcf 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -652,7 +652,7 @@ static void mvneta_mib_counters_clear(struct mvneta_port *pp) } /* Get System Network Statistics */ -static struct rtnl_link_stats64 * +static void mvneta_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -686,8 +686,6 @@ mvneta_get_stats64(struct net_device *dev, stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; - - return stats; } /* Rx descriptors helper methods */ diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 4fe430ceb194..69db40e1a4e1 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5739,7 +5739,7 @@ error: return err; } -static struct rtnl_link_stats64 * +static void mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mvpp2_port *port = netdev_priv(dev); @@ -5771,8 +5771,6 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_errors = dev->stats.rx_errors; stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; - - return stats; } static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index b60ad0e56a9f..18d6336fa162 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -3888,8 +3888,8 @@ static void sky2_set_multicast(struct net_device *dev) gma_write16(hw, port, GM_RX_CTRL, reg); } -static struct rtnl_link_stats64 *sky2_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void sky2_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; @@ -3929,8 +3929,6 @@ static struct rtnl_link_stats64 *sky2_get_stats(struct net_device *dev, stats->rx_dropped = dev->stats.rx_dropped; stats->rx_fifo_errors = dev->stats.rx_fifo_errors; stats->tx_fifo_errors = dev->stats.tx_fifo_errors; - - return stats; } /* Can have one global because blinking is controlled by diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3dd87889e67e..25ae0c5bce3a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -462,8 +462,8 @@ static void mtk_stats_update(struct mtk_eth *eth) } } -static struct rtnl_link_stats64 *mtk_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) +static void mtk_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *storage) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_hw_stats *hw_stats = mac->hw_stats; @@ -494,8 +494,6 @@ static struct rtnl_link_stats64 *mtk_get_stats64(struct net_device *dev, storage->tx_errors = dev->stats.tx_errors; storage->rx_dropped = dev->stats.rx_dropped; storage->tx_dropped = dev->stats.tx_dropped; - - return storage; } static inline int mtk_max_frag_size(int mtu) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edbe200ac2fa..06ef23f040a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1321,7 +1321,7 @@ static void mlx4_en_tx_timeout(struct net_device *dev) } -static struct rtnl_link_stats64 * +static void mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1330,8 +1330,6 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) mlx4_en_fold_software_stats(dev); netdev_stats_to_stats64(stats, &dev->stats); spin_unlock_bh(&priv->stats_lock); - - return stats; } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 88dd731bb8cb..60e5670452a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2686,7 +2686,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +static void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2729,7 +2729,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); - return stats; } static void mlx5e_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 850378893b25..2c864574a9d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -374,13 +374,12 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } -static struct rtnl_link_stats64 * +static void mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); - return stats; } static const struct switchdev_ops mlx5e_rep_switchdev_ops = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d768c7b6c6d6..46c53a042e6b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -947,15 +947,13 @@ out: /* Return the stats from a cache that is updated periodically, * as this function might get called in an atomic context. */ -static struct rtnl_link_stats64 * +static void mlxsw_sp_port_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); - - return stats; } int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 150ccf5192a9..696d40612d28 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -381,7 +381,7 @@ static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu) return 0; } -static struct rtnl_link_stats64 * +static void mlxsw_sx_port_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -410,7 +410,6 @@ mlxsw_sx_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; - return stats; } static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index e506ca876d0d..db297cfce6f4 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -378,8 +378,8 @@ static inline void put_be32(__be32 val, __be32 __iomem * p) __raw_writel((__force __u32) val, (__force void __iomem *)p); } -static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *stats); +static void myri10ge_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats); static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) { @@ -3119,8 +3119,8 @@ drop: return NETDEV_TX_OK; } -static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void myri10ge_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) { const struct myri10ge_priv *mgp = netdev_priv(dev); const struct myri10ge_slice_netstats *slice_stats; @@ -3135,7 +3135,6 @@ static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, stats->rx_dropped += slice_stats->rx_dropped; stats->tx_dropped += slice_stats->tx_dropped; } - return stats; } static void myri10ge_set_multicast_list(struct net_device *dev) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index e07b936f64ec..f364502229db 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3111,7 +3111,7 @@ static int vxge_change_mtu(struct net_device *dev, int new_mtu) * @stats: pointer to struct rtnl_link_stats64 * */ -static struct rtnl_link_stats64 * +static void vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) { struct vxgedev *vdev = netdev_priv(dev); @@ -3150,8 +3150,6 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) net_stats->tx_bytes += bytes; net_stats->tx_errors += txstats->tx_errors; } - - return net_stats; } static enum vxge_hw_status vxge_timestamp_config(struct __vxge_hw_device *devh) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index e8d448109e03..67afd95ffb93 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2638,8 +2638,8 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL); } -static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void nfp_net_stat64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct nfp_net *nn = netdev_priv(netdev); int r; @@ -2669,8 +2669,6 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, stats->tx_bytes += data[1]; stats->tx_errors += data[2]; } - - return stats; } static bool nfp_net_ebpf_capable(struct nfp_net *nn) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 3913f07279d2..dfc2c8149d22 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1733,7 +1733,7 @@ static void nv_update_stats(struct net_device *dev) * Called with read_lock(&dev_base_lock) held for read - * only synchronized against unregister_netdevice. */ -static struct rtnl_link_stats64* +static void nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) __acquires(&netdev_priv(dev)->hwstats_lock) __releases(&netdev_priv(dev)->hwstats_lock) @@ -1793,8 +1793,6 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) spin_unlock_bh(&np->hwstats_lock); } - - return storage; } /* diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 561fb94c7267..86fb9d3df700 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -90,8 +90,8 @@ static irqreturn_t netxen_msix_intr(int irq, void *data); static void netxen_free_ip_list(struct netxen_adapter *, bool); static void netxen_restore_indev_addr(struct net_device *dev, unsigned long); -static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *stats); +static void netxen_nic_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats); static int netxen_nic_set_mac(struct net_device *netdev, void *p); /* PCI Device ID Table */ @@ -2302,8 +2302,8 @@ request_reset: clear_bit(__NX_RESETTING, &adapter->state); } -static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *netdev, - struct rtnl_link_stats64 *stats) +static void netxen_nic_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct netxen_adapter *adapter = netdev_priv(netdev); @@ -2313,8 +2313,6 @@ static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *netdev, stats->tx_bytes = adapter->stats.txbytes; stats->rx_dropped = adapter->stats.rxdropped; stats->tx_dropped = adapter->stats.txdropped; - - return stats; } static irqreturn_t netxen_intr(int irq, void *data) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index b58509feecd5..40a76a1d5973 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -398,9 +398,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames; } -static -struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void qede_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct qede_dev *edev = netdev_priv(dev); @@ -430,8 +429,6 @@ struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev, stats->collisions = edev->stats.tx_total_collisions; stats->rx_crc_errors = edev->stats.rx_crc_errors; stats->rx_frame_errors = edev->stats.rx_align_errors; - - return stats; } #ifdef CONFIG_QED_SRIOV diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 422289c232bc..40ebe010b06f 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -312,8 +312,8 @@ static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /* Provide network statistics info for the interface */ -static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *net_stats) +static void emac_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) { struct emac_adapter *adpt = netdev_priv(netdev); unsigned int addr = REG_MAC_RX_STATUS_BIN; @@ -377,8 +377,6 @@ static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev, net_stats->tx_window_errors = stats->tx_late_col; spin_unlock(&stats->lock); - - return net_stats; } static const struct net_device_ops emac_netdev_ops = { diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 9bc047ac883b..5ad59c6d29a4 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -653,9 +653,8 @@ static int rtl8139_poll(struct napi_struct *napi, int budget); static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance); static int rtl8139_close (struct net_device *dev); static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd); -static struct rtnl_link_stats64 *rtl8139_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 - *stats); +static void rtl8139_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); static void rtl8139_set_rx_mode (struct net_device *dev); static void __set_rx_mode (struct net_device *dev); static void rtl8139_hw_start (struct net_device *dev); @@ -2516,7 +2515,7 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } -static struct rtnl_link_stats64 * +static void rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct rtl8139_private *tp = netdev_priv(dev); @@ -2544,8 +2543,6 @@ rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets = tp->tx_stats.packets; stats->tx_bytes = tp->tx_stats.bytes; } while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start)); - - return stats; } /* Set or clear the multicast filter for this adaptor. diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 44389c90056a..858f4554de11 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7755,7 +7755,7 @@ err_pm_runtime_put: goto out; } -static struct rtnl_link_stats64 * +static void rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct rtl8169_private *tp = netdev_priv(dev); @@ -7809,8 +7809,6 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) le16_to_cpu(tp->tc_offset.tx_aborted); pm_runtime_put_noidle(&pdev->dev); - - return stats; } static void rtl8169_net_suspend(struct net_device *dev) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index cddcff5a00a7..07074d9bc45d 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1706,11 +1706,9 @@ static inline u64 sxgbe_get_stat64(void __iomem *ioaddr, int reg_lo, int reg_hi) * This function is a driver entry point whenever ifconfig command gets * executed to see device statistics. Statistics are number of * bytes sent or received, errors occurred etc. - * Return value: - * This function returns various statistical information of device. */ -static struct rtnl_link_stats64 *sxgbe_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void sxgbe_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct sxgbe_priv_data *priv = netdev_priv(dev); void __iomem *ioaddr = priv->ioaddr; @@ -1761,8 +1759,6 @@ static struct rtnl_link_stats64 *sxgbe_get_stats64(struct net_device *dev, SXGBE_MMC_TXUFLWHI_GBCNT_REG); writel(0, ioaddr + SXGBE_MMC_CTL_REG); spin_unlock(&priv->stats_lock); - - return stats; } /* sxgbe_set_features - entry point to set offload features of the device. diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index bbbed2e84de8..ebeecb8fed45 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2219,16 +2219,14 @@ int efx_net_stop(struct net_device *net_dev) } /* Context: process, dev_base_lock or RTNL held, non-blocking. */ -static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev, - struct rtnl_link_stats64 *stats) +static void efx_net_stats(struct net_device *net_dev, + struct rtnl_link_stats64 *stats) { struct efx_nic *efx = netdev_priv(net_dev); spin_lock_bh(&efx->stats_lock); efx->type->update_stats(efx, NULL, stats); spin_unlock_bh(&efx->stats_lock); - - return stats; } /* Context: netif_tx_lock held, BHs disabled. */ diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index ec3ac0e45cc9..8cfbe01e1ddf 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2158,16 +2158,14 @@ int ef4_net_stop(struct net_device *net_dev) } /* Context: process, dev_base_lock or RTNL held, non-blocking. */ -static struct rtnl_link_stats64 *ef4_net_stats(struct net_device *net_dev, - struct rtnl_link_stats64 *stats) +static void ef4_net_stats(struct net_device *net_dev, + struct rtnl_link_stats64 *stats) { struct ef4_nic *efx = netdev_priv(net_dev); spin_lock_bh(&efx->stats_lock); efx->type->update_stats(efx, NULL, stats); spin_unlock_bh(&efx->stats_lock); - - return stats; } /* Context: netif_tx_lock held, BHs disabled. */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f90d1af6d390..e557a3290a25 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6294,8 +6294,8 @@ no_rings: stats->tx_errors = errors; } -static struct rtnl_link_stats64 *niu_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void niu_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct niu *np = netdev_priv(dev); @@ -6303,8 +6303,6 @@ static struct rtnl_link_stats64 *niu_get_stats(struct net_device *dev, niu_get_rx_stats(np, stats); niu_get_tx_stats(np, stats); } - - return stats; } static void niu_load_hash_xmac(struct niu *np, u16 *hash) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 09f5a67da35e..467dcc53f5e1 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2490,7 +2490,7 @@ static void dwceqos_read_mmc_counters(struct net_local *lp, u32 rx_mask, dwceqos_read(lp, DWC_MMC_RXPACKETCOUNT_GB); } -static struct rtnl_link_stats64* +static void dwceqos_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *s) { unsigned long flags; @@ -2522,8 +2522,6 @@ dwceqos_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *s) else s->tx_errors = hwstats->txunderflowerror + hwstats->txcarriererror; - - return s; } static void diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 0a3b7dafa3ba..30cfea62a356 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -2047,8 +2047,8 @@ static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * * Returns the address of the device statistics structure. */ -static struct rtnl_link_stats64 *tile_net_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void tile_net_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct tile_net_priv *priv = netdev_priv(dev); u64 rx_packets = 0, tx_packets = 0; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 0a6c4e804eed..453a1fad560c 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -513,8 +513,8 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance); static void rhine_tx(struct net_device *dev); static int rhine_rx(struct net_device *dev, int limit); static void rhine_set_rx_mode(struct net_device *dev); -static struct rtnl_link_stats64 *rhine_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); +static void rhine_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; static int rhine_close(struct net_device *dev); @@ -2221,7 +2221,7 @@ out_unlock: mutex_unlock(&rp->task_lock); } -static struct rtnl_link_stats64 * +static void rhine_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct rhine_private *rp = netdev_priv(dev); @@ -2244,8 +2244,6 @@ rhine_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets = rp->tx_stats.packets; stats->tx_bytes = rp->tx_stats.bytes; } while (u64_stats_fetch_retry_irq(&rp->tx_stats.syncp, start)); - - return stats; } static void rhine_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b77e4ecf3cf2..5028001429c7 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -57,8 +57,7 @@ static void fjes_raise_intr_rxdata_task(struct work_struct *); static void fjes_tx_stall_task(struct work_struct *); static void fjes_force_close_task(struct work_struct *); static irqreturn_t fjes_intr(int, void*); -static struct rtnl_link_stats64 * -fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *); +static void fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *); static int fjes_change_mtu(struct net_device *, int); static int fjes_vlan_rx_add_vid(struct net_device *, __be16 proto, u16); static int fjes_vlan_rx_kill_vid(struct net_device *, __be16 proto, u16); @@ -782,14 +781,12 @@ static void fjes_tx_retry(struct net_device *netdev) netif_tx_wake_queue(queue); } -static struct rtnl_link_stats64 * +static void fjes_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct fjes_adapter *adapter = netdev_priv(netdev); memcpy(stats, &adapter->stats64, sizeof(struct rtnl_link_stats64)); - - return stats; } static int fjes_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c9414c054852..05374fce7da4 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -908,8 +908,8 @@ out: return ret; } -static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, - struct rtnl_link_stats64 *t) +static void netvsc_get_stats64(struct net_device *net, + struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); int cpu; @@ -947,8 +947,6 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, t->rx_dropped = net->stats.rx_dropped; t->rx_errors = net->stats.rx_errors; - - return t; } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 66c0eeafcb5d..082534e187fc 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -129,8 +129,8 @@ resched: } -static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void ifb_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct ifb_dev_private *dp = netdev_priv(dev); struct ifb_q_private *txp = dp->tx_private; @@ -157,8 +157,6 @@ static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev, } stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; - - return stats; } static int ifb_dev_init(struct net_device *dev) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index ce7ca6a5aa8a..1cdb8c5ec403 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -303,8 +303,8 @@ static void ipvlan_set_multicast_mac_filter(struct net_device *dev) dev_mc_sync(ipvlan->phy_dev, dev); } -static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *s) +static void ipvlan_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) { struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -341,7 +341,6 @@ static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev, s->rx_dropped = rx_errs; s->tx_dropped = tx_drps; } - return s; } static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 1e05b7c2d157..30a493936e63 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -97,8 +97,8 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void loopback_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { u64 bytes = 0; u64 packets = 0; @@ -122,7 +122,6 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, stats->tx_packets = packets; stats->rx_bytes = bytes; stats->tx_bytes = bytes; - return stats; } static u32 always_on(struct net_device *dev) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f83cf6696820..778a77303c49 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2888,13 +2888,13 @@ static int macsec_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static struct rtnl_link_stats64 *macsec_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *s) +static void macsec_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) { int cpu; if (!dev->tstats) - return s; + return; for_each_possible_cpu(cpu) { struct pcpu_sw_netstats *stats; @@ -2918,8 +2918,6 @@ static struct rtnl_link_stats64 *macsec_get_stats64(struct net_device *dev, s->rx_dropped = dev->stats.rx_dropped; s->tx_dropped = dev->stats.tx_dropped; - - return s; } static int macsec_get_iflink(const struct net_device *dev) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 20b3fdf282c5..440ab3d8adf7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -855,8 +855,8 @@ static void macvlan_uninit(struct net_device *dev) macvlan_port_destroy(port->dev); } -static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void macvlan_dev_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -893,7 +893,6 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, stats->rx_dropped = rx_errors; stats->tx_dropped = tx_dropped; } - return stats; } static int macvlan_vlan_rx_add_vid(struct net_device *dev, diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index 2de7faee9b19..b91603835d26 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -58,7 +58,7 @@ static int nlmon_close(struct net_device *dev) return netlink_remove_tap(&nlmon->nt); } -static struct rtnl_link_stats64 * +static void nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { int i; @@ -86,8 +86,6 @@ nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_bytes = bytes; stats->tx_bytes = 0; - - return stats; } static u32 always_on(struct net_device *dev) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 3d3b1f4339ef..a411b43a69eb 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1297,7 +1297,7 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -static struct rtnl_link_stats64* +static void ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) { struct ppp *ppp = netdev_priv(dev); @@ -1317,8 +1317,6 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) stats64->rx_dropped = dev->stats.rx_dropped; stats64->tx_dropped = dev->stats.tx_dropped; stats64->rx_length_errors = dev->stats.rx_length_errors; - - return stats64; } static int ppp_dev_init(struct net_device *dev) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 9841f3dc0682..08db4d687533 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -566,7 +566,7 @@ static int sl_change_mtu(struct net_device *dev, int new_mtu) /* Netdevice get statistics request */ -static struct rtnl_link_stats64 * +static void sl_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct net_device_stats *devstats = &dev->stats; @@ -597,7 +597,6 @@ sl_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->collisions += comp->sls_o_misses; } #endif - return stats; } /* Netdevice register callback */ diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index bdc58567d10e..a3711769544b 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1798,7 +1798,7 @@ unwind: return err; } -static struct rtnl_link_stats64 * +static void team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct team *team = netdev_priv(dev); @@ -1835,7 +1835,6 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_dropped = rx_dropped; stats->tx_dropped = tx_dropped; stats->rx_nohandler = rx_nohandler; - return stats; } static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cd8e02c94be0..8c1d3bd6b4d0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -953,7 +953,7 @@ static void tun_set_headroom(struct net_device *dev, int new_hr) tun->align = new_hr; } -static struct rtnl_link_stats64 * +static void tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; @@ -987,7 +987,6 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_dropped = rx_dropped; stats->rx_frame_errors = rx_frame_errors; stats->tx_dropped = tx_dropped; - return stats; } static const struct net_device_ops tun_netdev_ops = { diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0520952aa096..8c39d6d690e5 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -158,8 +158,8 @@ static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) return atomic64_read(&priv->dropped); } -static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void veth_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; @@ -177,8 +177,6 @@ static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev, tot->rx_packets = one.packets; } rcu_read_unlock(); - - return tot; } /* fake multicast ability */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2cea022e6e6e..37db91d1a0a3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1272,8 +1272,8 @@ out: return ret; } -static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void virtnet_stats(struct net_device *dev, + struct rtnl_link_stats64 *tot) { struct virtnet_info *vi = netdev_priv(dev); int cpu; @@ -1306,8 +1306,6 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, tot->rx_dropped = dev->stats.rx_dropped; tot->rx_length_errors = dev->stats.rx_length_errors; tot->rx_frame_errors = dev->stats.rx_frame_errors; - - return tot; } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index aabc6ef366b4..f88ffafebfbf 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -113,7 +113,7 @@ vmxnet3_global_stats[] = { }; -struct rtnl_link_stats64 * +void vmxnet3_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -160,8 +160,6 @@ vmxnet3_get_stats64(struct net_device *netdev, stats->rx_dropped += drvRxStats->drop_total; stats->multicast += devRxStats->mcastPktsRxOK; } - - return stats; } static int diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 59e077be8829..ba1c9f93592b 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -465,8 +465,8 @@ vmxnet3_create_queues(struct vmxnet3_adapter *adapter, void vmxnet3_set_ethtool_ops(struct net_device *netdev); -struct rtnl_link_stats64 * -vmxnet3_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +void vmxnet3_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); extern char vmxnet3_driver_name[]; #endif diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 23dfb0eac098..895e3e258543 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -77,8 +77,8 @@ static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) kfree_skb(skb); } -static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void vrf_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { int i; @@ -102,7 +102,6 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, stats->rx_bytes += rbytes; stats->rx_packets += rpkts; } - return stats; } /* Local traffic destined to local address. Reinsert the packet to rx diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a479cd99911d..40f26b69beb1 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1073,8 +1073,8 @@ static int xennet_change_mtu(struct net_device *dev, int mtu) return 0; } -static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void xennet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { struct netfront_info *np = netdev_priv(dev); int cpu; @@ -1105,8 +1105,6 @@ static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev, tot->rx_errors = dev->stats.rx_errors; tot->tx_dropped = dev->stats.tx_dropped; - - return tot; } static void xennet_release_tx_bufs(struct netfront_queue *queue) diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index fb0928a4fb97..f84069ffa8c6 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -397,14 +397,6 @@ static void xlr_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats) TX_DROP_FRAME_COUNTER); } -static struct rtnl_link_stats64 *xlr_get_stats64(struct net_device *ndev, - struct rtnl_link_stats64 *stats - ) -{ - xlr_stats(ndev, stats); - return stats; -} - static const struct net_device_ops xlr_netdev_ops = { .ndo_open = xlr_net_open, .ndo_stop = xlr_net_stop, @@ -412,7 +404,7 @@ static const struct net_device_ops xlr_netdev_ops = { .ndo_select_queue = xlr_net_select_queue, .ndo_set_mac_address = xlr_net_set_mac_addr, .ndo_set_rx_mode = xlr_set_rx_mode, - .ndo_get_stats64 = xlr_get_stats64, + .ndo_get_stats64 = xlr_stats, }; /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecd78b3c9aba..b14ad9c139d7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -913,8 +913,8 @@ struct netdev_xdp { * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * - * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, - * struct rtnl_link_stats64 *storage); + * void (*ndo_get_stats64)(struct net_device *dev, + * struct rtnl_link_stats64 *storage); * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. Drivers must do one of the following: @@ -1165,8 +1165,8 @@ struct net_device_ops { struct neigh_parms *); void (*ndo_tx_timeout) (struct net_device *dev); - struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, - struct rtnl_link_stats64 *storage); + void (*ndo_get_stats64)(struct net_device *dev, + struct rtnl_link_stats64 *storage); bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e893fe43dd13..3d4ca4df1209 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -261,8 +261,8 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot); +void ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, __be16 flags, __be32 remote, __be32 local, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 10da6c588bf8..116455ac3db5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -671,7 +671,8 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev, return 0; } -static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +static void vlan_dev_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct vlan_pcpu_stats *p; u32 rx_errors = 0, tx_dropped = 0; @@ -702,8 +703,6 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st } stats->rx_errors = rx_errors; stats->tx_dropped = tx_dropped; - - return stats; } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ed3b3192fb00..6c46d1b4cdbb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -153,8 +153,8 @@ static int br_dev_stop(struct net_device *dev) return 0; } -static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -178,8 +178,6 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, stats->tx_packets = sum.tx_packets; stats->rx_bytes = sum.rx_bytes; stats->rx_packets = sum.rx_packets; - - return stats; } static int br_change_mtu(struct net_device *dev, int new_mtu) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fed3d29f9eb3..5476110598f7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -188,8 +188,8 @@ int iptunnel_handle_offloads(struct sk_buff *skb, EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); /* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +void ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) { int i; @@ -214,8 +214,6 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, tot->rx_bytes += rx_bytes; tot->tx_bytes += tx_bytes; } - - return tot; } EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index e2c6ae024565..8bf18a5f66e0 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -106,8 +106,8 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void l2tp_eth_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct l2tp_eth *priv = netdev_priv(dev); @@ -117,10 +117,8 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev, stats->rx_bytes = atomic_long_read(&priv->rx_bytes); stats->rx_packets = atomic_long_read(&priv->rx_packets); stats->rx_errors = atomic_long_read(&priv->rx_errors); - return stats; } - static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_init = l2tp_eth_dev_init, .ndo_uninit = l2tp_eth_dev_uninit, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 41497b670e2b..77e8a42225f9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1122,7 +1122,7 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev, return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } -static struct rtnl_link_stats64 * +static void ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { int i; @@ -1147,8 +1147,6 @@ ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_bytes += rx_bytes; stats->tx_bytes += tx_bytes; } - - return stats; } static const struct net_device_ops ieee80211_dataif_ops = { diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index d5d6caecd072..09141a18ee2d 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -97,7 +97,7 @@ static void internal_dev_destructor(struct net_device *dev) free_netdev(dev); } -static struct rtnl_link_stats64 * +static void internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { int i; @@ -125,8 +125,6 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_bytes += local_stats.tx_bytes; stats->tx_packets += local_stats.tx_packets; } - - return stats; } static void internal_set_rx_headroom(struct net_device *dev, int new_hr) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index b0196366d58d..9fe6b427afed 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -401,8 +401,8 @@ static int teql_master_close(struct net_device *dev) return 0; } -static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void teql_master_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct teql_master *m = netdev_priv(dev); @@ -410,7 +410,6 @@ static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, stats->tx_bytes = m->tx_bytes; stats->tx_errors = m->tx_errors; stats->tx_dropped = m->tx_dropped; - return stats; } static int teql_master_mtu(struct net_device *dev, int new_mtu) -- cgit v1.2.3 From e7246e122aaa99ebbb8ad7da80f35a20577bd8af Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:35 -0500 Subject: net-tc: extract skip classify bit from tc_verd Packets sent by the IFB device skip subsequent tc classification. A single bit governs this state. Move it out of tc_verd in anticipation of removing that __u16 completely. The new bitfield tc_skip_classify temporarily uses one bit of a hole, until tc_verd is removed completely in a follow-up patch. Remove the bit hole comment. It could be 2, 3, 4 or 5 bits long. With that many options, little value in documenting it. Introduce a helper function to deduplicate the logic in the two sites that check this bit. The field tc_skip_classify is set only in IFB on skbs cloned in act_mirred, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 2 +- include/linux/skbuff.h | 5 ++++- include/net/sch_generic.h | 11 +++++++++++ include/uapi/linux/pkt_cls.h | 6 ------ net/core/dev.c | 10 +++------- net/sched/act_api.c | 11 ++++------- 6 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 082534e187fc..442c4c4a9606 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -81,7 +81,7 @@ static void ifb_ri_tasklet(unsigned long _txp) u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; - skb->tc_verd = SET_TC_NCLS(skb->tc_verd); + skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); txp->tx_packets++; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..570f60ec6cb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -589,6 +589,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @tc_skip_classify: do not classify packet. set by IFB device * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -749,7 +750,9 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; #endif - /* 2, 4 or 5 bit hole */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 498f81b229a4..857356f2d74b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -418,6 +418,17 @@ static inline bool skb_at_tc_ingress(const struct sk_buff *skb) #endif } +static inline bool skb_skip_tc_classify(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + if (skb->tc_skip_classify) { + skb->tc_skip_classify = 0; + return true; + } +#endif + return false; +} + /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bba23dbb3ab6..1eed5d7509bc 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,8 +22,6 @@ bit 6,7: Where this packet was last seen 1: on the Ingress 2: on the Egress -bit 8: when set --> Request not to classify on ingress. - * * */ @@ -36,10 +34,6 @@ bit 8: when set --> Request not to classify on ingress. #define AT_INGRESS 0x1 #define AT_EGRESS 0x2 -#define TC_NCLS _TC_MAKEMASK1(8) -#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) -#define CLR_TC_NCLS(v) ( v & ~TC_NCLS) - #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) #define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) diff --git a/net/core/dev.c b/net/core/dev.c index 56818f7eab2b..e39e35d2e082 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4093,12 +4093,8 @@ another_round: goto out; } -#ifdef CONFIG_NET_CLS_ACT - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - goto ncls; - } -#endif + if (skb_skip_tc_classify(skb)) + goto skip_classify; if (pfmemalloc) goto skip_taps; @@ -4128,8 +4124,8 @@ skip_taps: #endif #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; -ncls: #endif +skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2095c83ce773..f04715a57300 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -426,11 +426,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, { int ret = -1, i; - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - ret = TC_ACT_OK; - goto exec_done; - } + if (skb_skip_tc_classify(skb)) + return TC_ACT_OK; + for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; @@ -439,9 +437,8 @@ repeat: if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) - goto exec_done; + break; } -exec_done: return ret; } EXPORT_SYMBOL(tcf_action_exec); -- cgit v1.2.3 From a5135bcfba7345031df45e02cd150a45add47cf8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:36 -0500 Subject: net-tc: convert tc_verd to integer bitfields Extract the remaining two fields from tc_verd and remove the __u16 completely. TC_AT and TC_FROM are converted to equivalent two-bit integer fields tc_at and tc_from. Where possible, use existing helper skb_at_tc_ingress when reading tc_at. Introduce helper skb_reset_tc to clear fields. Not documenting tc_from and tc_at, because they will be replaced with single bit fields in follow-on patches. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 7 +++---- drivers/staging/octeon/ethernet-tx.c | 5 ++--- include/linux/skbuff.h | 6 ++---- include/net/sch_generic.h | 10 +++++++++- include/uapi/linux/pkt_cls.h | 31 ------------------------------- net/core/dev.c | 10 ++++------ net/core/pktgen.c | 4 +--- net/core/skbuff.c | 3 --- net/sched/act_ife.c | 7 +++---- net/sched/act_mirred.c | 9 ++++----- net/sched/sch_netem.c | 2 +- 11 files changed, 29 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 442c4c4a9606..b73b6b6c066b 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -78,9 +78,9 @@ static void ifb_ri_tasklet(unsigned long _txp) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { - u32 from = G_TC_FROM(skb->tc_verd); + u32 from = skb->tc_from; - skb->tc_verd = 0; + skb_reset_tc(skb); skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); @@ -239,7 +239,6 @@ static void ifb_setup(struct net_device *dev) static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ifb_dev_private *dp = netdev_priv(dev); - u32 from = G_TC_FROM(skb->tc_verd); struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb); u64_stats_update_begin(&txp->rsync); @@ -247,7 +246,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) txp->rx_bytes += skb->len; u64_stats_update_end(&txp->rsync); - if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) { + if (skb->tc_from == AT_STACK || !skb->skb_iif) { dev_kfree_skb(skb); dev->stats.rx_dropped++; return NETDEV_TX_OK; diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 6b4c20872323..0b8053205091 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -23,6 +23,7 @@ #endif /* CONFIG_XFRM */ #include +#include #include @@ -369,9 +370,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef CONFIG_NET_SCHED skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif /* CONFIG_NET_CLS_ACT */ + skb_reset_tc(skb); #endif /* CONFIG_NET_SCHED */ #endif /* REUSE_SKBUFFS_WITHOUT_FREE */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 570f60ec6cb4..f738d09947b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -599,7 +599,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index - * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue @@ -752,13 +751,12 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; + __u8 tc_at:2; + __u8 tc_from:2; #endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif #endif union { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 857356f2d74b..f80dba516964 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -409,10 +409,18 @@ bool tcf_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); +static inline void skb_reset_tc(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_at = 0; + skb->tc_from = 0; +#endif +} + static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - return G_TC_AT(skb->tc_verd) & AT_INGRESS; + return skb->tc_at & AT_INGRESS; #else return false; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 1eed5d7509bc..cee753a7a40c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -5,40 +5,9 @@ #include #ifdef __KERNEL__ -/* I think i could have done better macros ; for now this is stolen from - * some arch/mips code - jhs -*/ -#define _TC_MAKE32(x) ((x)) - -#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n)) -#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n)) -#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n)) -#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n)) - -/* verdict bit breakdown - * -bit 6,7: Where this packet was last seen -0: Above the transmit example at the socket level -1: on the Ingress -2: on the Egress - - * - * */ - -#define S_TC_FROM _TC_MAKE32(6) -#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) -#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) -#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM) -#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM)) #define AT_STACK 0x0 #define AT_INGRESS 0x1 #define AT_EGRESS 0x2 - -#define S_TC_AT _TC_MAKE32(12) -#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) -#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) -#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) -#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) #endif /* Action attributes */ diff --git a/net/core/dev.c b/net/core/dev.c index e39e35d2e082..8b5d6d033473 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3153,7 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (!cl) return skb; - /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set + /* skb->tc_at and qdisc_skb_cb(skb)->pkt_len were already set * earlier by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); @@ -3320,7 +3320,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); + skb->tc_at = AT_EGRESS; # ifdef CONFIG_NET_EGRESS if (static_key_false(&egress_needed)) { skb = sch_handle_egress(skb, &rc, dev); @@ -3920,7 +3920,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); + skb->tc_at = AT_INGRESS; qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { @@ -4122,9 +4122,7 @@ skip_taps: goto out; } #endif -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif + skb_reset_tc(skb); skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8e69ce472236..96947f5d41e4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* skb was 'freed' by stack, so clean few * bits and reuse it */ -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; /* reset reclass/redir ttl */ -#endif + skb_reset_tc(skb); } while (--burst > 0); goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5a03730fbc1a..adec4bf807d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -878,9 +878,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); -#ifdef CONFIG_NET_CLS_ACT - CHECK_SKB_FIELD(tc_verd); -#endif #endif } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 80b848d3f096..921fb20eaa7c 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -736,12 +736,11 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, u16 metalen = ife_get_sz(skb, ife); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; unsigned int skboff = skb->dev->hard_header_len; - u32 at = G_TC_AT(skb->tc_verd); int new_len = skb->len + hdrm; bool exceed_mtu = false; int err; - if (at & AT_EGRESS) { + if (!skb_at_tc_ingress(skb)) { if (new_len > skb->dev->mtu) exceed_mtu = true; } @@ -773,7 +772,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); iethh = (struct ethhdr *)skb->data; @@ -816,7 +815,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, ether_addr_copy(oethh->h_dest, iethh->h_dest); oethh->h_proto = htons(ife->eth_type); - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); spin_unlock(&ife->tcf_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d9fa6e0a1b4..8543279bba49 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -170,7 +170,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, int retval, err = 0; int m_eaction; int mac_len; - u32 at; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); @@ -191,7 +190,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, goto out; } - at = G_TC_AT(skb->tc_verd); skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) goto out; @@ -200,8 +198,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) { - if (at & AT_EGRESS) { + if (skb->tc_at != tcf_mirred_act_direction(m_eaction) && + m_mac_header_xmit) { + if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ mac_len = skb_network_header(skb) - skb_mac_header(skb); skb_pull_rcsum(skb2, mac_len); @@ -213,7 +212,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, /* mirror is always swallowed */ if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); + skb2->tc_from = skb2->tc_at; skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bcfadfdea8e0..bb5c638b6852 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -626,7 +626,7 @@ deliver: * If it's at ingress let's pretend the delay is * from the network (tstamp will be updated). */ - if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + if (skb->tc_from & AT_INGRESS) skb->tstamp = 0; #endif -- cgit v1.2.3 From 8dc07fdbf2054f157e8333f940a1ad728916c786 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:37 -0500 Subject: net-tc: convert tc_at to tc_at_ingress Field tc_at is used only within tc actions to distinguish ingress from egress processing. A single bit is sufficient for this purpose. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- include/net/sch_generic.h | 3 +-- net/core/dev.c | 8 +++----- net/sched/act_mirred.c | 12 ++++++------ 4 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f738d09947b2..fab3f87e9bd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -590,6 +590,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device + * @tc_at_ingress: used within tc_classify to distinguish in/egress * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -751,7 +752,7 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; - __u8 tc_at:2; + __u8 tc_at_ingress:1; __u8 tc_from:2; #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f80dba516964..4bd6d5387209 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -412,7 +412,6 @@ int skb_do_redirect(struct sk_buff *); static inline void skb_reset_tc(struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - skb->tc_at = 0; skb->tc_from = 0; #endif } @@ -420,7 +419,7 @@ static inline void skb_reset_tc(struct sk_buff *skb) static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - return skb->tc_at & AT_INGRESS; + return skb->tc_at_ingress; #else return false; #endif diff --git a/net/core/dev.c b/net/core/dev.c index 8b5d6d033473..c143f1391117 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3153,9 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (!cl) return skb; - /* skb->tc_at and qdisc_skb_cb(skb)->pkt_len were already set - * earlier by the caller. - */ + /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { @@ -3320,7 +3318,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT - skb->tc_at = AT_EGRESS; + skb->tc_at_ingress = 0; # ifdef CONFIG_NET_EGRESS if (static_key_false(&egress_needed)) { skb = sch_handle_egress(skb, &rc, dev); @@ -3920,7 +3918,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - skb->tc_at = AT_INGRESS; + skb->tc_at_ingress = 1; qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8543279bba49..e832c62fd705 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -39,15 +39,15 @@ static bool tcf_mirred_is_act_redirect(int action) return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } -static u32 tcf_mirred_act_direction(int action) +static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: - return AT_EGRESS; + return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: - return AT_INGRESS; + return true; default: BUG(); } @@ -198,7 +198,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (skb->tc_at != tcf_mirred_act_direction(m_eaction) && + if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) && m_mac_header_xmit) { if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ @@ -212,11 +212,11 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, /* mirror is always swallowed */ if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_from = skb2->tc_at; + skb2->tc_from = skb_at_tc_ingress(skb) ? AT_INGRESS : AT_EGRESS; skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS) + if (!tcf_mirred_act_wants_ingress(m_eaction)) err = dev_queue_xmit(skb2); else err = netif_receive_skb(skb2); -- cgit v1.2.3 From bc31c905e946b5c55df5d2938335e78ffb3157ca Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:38 -0500 Subject: net-tc: convert tc_from to tc_from_ingress and tc_redirected The tc_from field fulfills two roles. It encodes whether a packet was redirected by an act_mirred device and, if so, whether act_mirred was called on ingress or egress. Split it into separate fields. The information is needed by the special IFB loop, where packets are taken out of the normal path by act_mirred, forwarded to IFB, then reinjected at their original location (ingress or egress) by IFB. The IFB device cannot use skb->tc_at_ingress, because that may have been overwritten as the packet travels from act_mirred to ifb_xmit, when it passes through tc_classify on the IFB egress path. Cache this value in skb->tc_from_ingress. That field is valid only if a packet arriving at ifb_xmit came from act_mirred. Other packets can be crafted to reach ifb_xmit. These must be dropped. Set tc_redirected on redirection and drop all packets that do not have this bit set. Both fields are set only on cloned skbs in tc actions, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 13 +++++-------- include/linux/skbuff.h | 5 ++++- include/net/sch_generic.h | 2 +- include/uapi/linux/pkt_cls.h | 6 ------ net/sched/act_mirred.c | 6 ++++-- net/sched/sch_netem.c | 2 +- 6 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index b73b6b6c066b..312fce7302d3 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -78,9 +78,7 @@ static void ifb_ri_tasklet(unsigned long _txp) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { - u32 from = skb->tc_from; - - skb_reset_tc(skb); + skb->tc_redirected = 0; skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); @@ -101,13 +99,12 @@ static void ifb_ri_tasklet(unsigned long _txp) rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; - if (from & AT_EGRESS) { + if (!skb->tc_from_ingress) { dev_queue_xmit(skb); - } else if (from & AT_INGRESS) { + } else { skb_pull(skb, skb->mac_len); netif_receive_skb(skb); - } else - BUG(); + } } if (__netif_tx_trylock(txq)) { @@ -246,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) txp->rx_bytes += skb->len; u64_stats_update_end(&txp->rsync); - if (skb->tc_from == AT_STACK || !skb->skb_iif) { + if (!skb->tc_redirected || !skb->skb_iif) { dev_kfree_skb(skb); dev->stats.rx_dropped++; return NETDEV_TX_OK; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fab3f87e9bd1..3149a88de548 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -591,6 +591,8 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress + * @tc_redirected: packet was redirected by a tc action + * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -753,7 +755,8 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_from:2; + __u8 tc_redirected:1; + __u8 tc_from_ingress:1; #endif #ifdef CONFIG_NET_SCHED diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4bd6d5387209..e2f426f6d62f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -412,7 +412,7 @@ int skb_do_redirect(struct sk_buff *); static inline void skb_reset_tc(struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - skb->tc_from = 0; + skb->tc_redirected = 0; #endif } diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cee753a7a40c..a081efbd61a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -4,12 +4,6 @@ #include #include -#ifdef __KERNEL__ -#define AT_STACK 0x0 -#define AT_INGRESS 0x1 -#define AT_EGRESS 0x2 -#endif - /* Action attributes */ enum { TCA_ACT_UNSPEC, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e832c62fd705..84682f02b611 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -211,8 +211,10 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } /* mirror is always swallowed */ - if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_from = skb_at_tc_ingress(skb) ? AT_INGRESS : AT_EGRESS; + if (tcf_mirred_is_act_redirect(m_eaction)) { + skb2->tc_redirected = 1; + skb2->tc_from_ingress = skb2->tc_at_ingress; + } skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bb5c638b6852..c8bb62a1e744 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -626,7 +626,7 @@ deliver: * If it's at ingress let's pretend the delay is * from the network (tstamp will be updated). */ - if (skb->tc_from & AT_INGRESS) + if (skb->tc_redirected && skb->tc_from_ingress) skb->tstamp = 0; #endif -- cgit v1.2.3 From bd2522b168847106c1885f0319a2833bdf88bf9a Mon Sep 17 00:00:00 2001 From: Andrzej Zaborowski Date: Fri, 6 Jan 2017 16:33:43 -0500 Subject: cfg80211: NL80211_ATTR_SOCKET_OWNER support for CMD_CONNECT Disconnect or deauthenticate when the owning socket is closed if this flag is supplied to CMD_CONNECT or CMD_ASSOCIATE. This may be used to ensure userspace daemon doesn't leave an unmanaged connection behind. In some situations it would be possible to account for that, to some degree, in the deamon restart code or in the up/down scripts without the use of this attribute. But there will be systems where the daemon can go away for varying periods without a warning due to local resource management. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++++ include/uapi/linux/nl80211.h | 2 ++ net/wireless/core.c | 3 +++ net/wireless/core.h | 1 + net/wireless/mlme.c | 5 +++++ net/wireless/nl80211.c | 26 +++++++++++++++++++++++++- net/wireless/sme.c | 33 +++++++++++++++++++++++++++++++++ 7 files changed, 76 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 41a9ecd82ca0..cb13789ebaef 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3865,6 +3865,9 @@ struct cfg80211_cached_keys; * @conn: (private) cfg80211 software SME connection state machine data * @connect_keys: (private) keys to set after connection is established * @conn_bss_type: connecting/connected BSS type + * @conn_owner_nlportid: (private) connection owner socket port ID + * @disconnect_wk: (private) auto-disconnect work + * @disconnect_bssid: (private) the BSSID to use for auto-disconnect * @ibss_fixed: (private) IBSS is using fixed BSSID * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing @@ -3896,6 +3899,10 @@ struct wireless_dev { struct cfg80211_conn *conn; struct cfg80211_cached_keys *connect_keys; enum ieee80211_bss_type conn_bss_type; + u32 conn_owner_nlportid; + + struct work_struct disconnect_wk; + u8 disconnect_bssid[ETH_ALEN]; struct list_head event_list; spinlock_t event_lock; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d74e10b1246a..174f4b30e804 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1820,6 +1820,8 @@ enum nl80211_commands { * and remove functions. NAN notifications will be sent in unicast to that * socket. Without this attribute, any socket can add functions and the * notifications will be sent to the %NL80211_MCGRP_NAN multicast group. + * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the + * station will deauthenticate when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. diff --git a/net/wireless/core.c b/net/wireless/core.c index 158c59ecf90a..903fc419217a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1142,6 +1142,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) dev->priv_flags |= IFF_DONT_BRIDGE; + INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); break; case NETDEV_GOING_DOWN: @@ -1230,6 +1232,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, #ifdef CONFIG_CFG80211_WEXT kzfree(wdev->wext.keys); #endif + flush_work(&wdev->disconnect_wk); } /* * synchronise (so that we won't find this netdev diff --git a/net/wireless/core.h b/net/wireless/core.h index bc8ba6e57519..ba42055a036d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -400,6 +400,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *resp_ie, size_t resp_ie_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_autodisconnect_wk(struct work_struct *work); /* SME implementation */ void cfg80211_conn_work(struct work_struct *work); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4646cf5695b9..1c63a77aea34 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -345,6 +345,11 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return 0; + if (ether_addr_equal(wdev->disconnect_bssid, bssid) || + (wdev->current_bss && + ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + wdev->conn_owner_nlportid = 0; + return rdev_deauth(rdev, dev, &req); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fed33ec20a71..b378d0a04003 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8050,8 +8050,17 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, &req); + + if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + dev->ieee80211_ptr->conn_owner_nlportid = + info->snd_portid; + memcpy(dev->ieee80211_ptr->disconnect_bssid, + bssid, ETH_ALEN); + } + wdev_unlock(dev->ieee80211_ptr); } @@ -8770,11 +8779,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } wdev_lock(dev->ieee80211_ptr); + err = cfg80211_connect(rdev, dev, &connect, connkeys, connect.prev_bssid); - wdev_unlock(dev->ieee80211_ptr); if (err) kzfree(connkeys); + + if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; + if (connect.bssid) + memcpy(dev->ieee80211_ptr->disconnect_bssid, + connect.bssid, ETH_ALEN); + else + memset(dev->ieee80211_ptr->disconnect_bssid, + 0, ETH_ALEN); + } + + wdev_unlock(dev->ieee80211_ptr); + return err; } @@ -14491,6 +14513,8 @@ static int nl80211_netlink_notify(struct notifier_block * nb, if (wdev->owner_nlportid == notify->portid) schedule_destroy_work = true; + else if (wdev->conn_owner_nlportid == notify->portid) + schedule_work(&wdev->disconnect_wk); } spin_lock_bh(&rdev->beacon_registrations_lock); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 5e0d19380302..46693913fcea 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -727,6 +727,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kzfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; + wdev->conn_owner_nlportid = 0; if (bss) { cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); @@ -955,6 +956,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->current_bss = NULL; wdev->ssid_len = 0; + wdev->conn_owner_nlportid = 0; nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); @@ -1098,6 +1100,8 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, kzfree(wdev->connect_keys); wdev->connect_keys = NULL; + wdev->conn_owner_nlportid = 0; + if (wdev->conn) err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) @@ -1107,3 +1111,32 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, return err; } + +/* + * Used to clean up after the connection / connection attempt owner socket + * disconnects + */ +void cfg80211_autodisconnect_wk(struct work_struct *work) +{ + struct wireless_dev *wdev = + container_of(work, struct wireless_dev, disconnect_wk); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + wdev_lock(wdev); + + if (wdev->conn_owner_nlportid) { + /* + * Use disconnect_bssid if still connecting and ops->disconnect + * not implemented. Otherwise we can use cfg80211_disconnect. + */ + if (rdev->ops->disconnect || wdev->current_bss) + cfg80211_disconnect(rdev, wdev->netdev, + WLAN_REASON_DEAUTH_LEAVING, true); + else + cfg80211_mlme_deauth(rdev, wdev->netdev, + wdev->disconnect_bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + } + + wdev_unlock(wdev); +} -- cgit v1.2.3 From ab3d408d3f40f939d46a32b1c24aa2833a13b846 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 8 Jan 2017 14:52:07 -0800 Subject: net: dsa: Encapsulate legacy switch drivers into dsa_switch_driver In preparation for making struct dsa_switch_ops const, encapsulate it within a dsa_switch_driver which has a list pointer and a pointer to dsa_switch_ops. This allows us to take the list_head pointer out of dsa_switch_ops, which is written to by {un,}register_switch_driver. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 8 ++++++-- drivers/net/dsa/mv88e6xxx/chip.c | 8 ++++++-- include/net/dsa.h | 11 +++++++---- net/dsa/dsa.c | 12 +++++++----- 4 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 7ce36dbd9b62..bcbd6dcbd8e8 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -261,16 +261,20 @@ static struct dsa_switch_ops mv88e6060_switch_ops = { .phy_write = mv88e6060_phy_write, }; +static struct dsa_switch_driver mv88e6060_switch_drv = { + .ops = &mv88e6060_switch_ops, +}; + static int __init mv88e6060_init(void) { - register_switch_driver(&mv88e6060_switch_ops); + register_switch_driver(&mv88e6060_switch_drv); return 0; } module_init(mv88e6060_init); static void __exit mv88e6060_cleanup(void) { - unregister_switch_driver(&mv88e6060_switch_ops); + unregister_switch_driver(&mv88e6060_switch_drv); } module_exit(mv88e6060_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 676b0e2ad221..d43d12c281b3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4403,6 +4403,10 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_mdb_dump = mv88e6xxx_port_mdb_dump, }; +static struct dsa_switch_driver mv88e6xxx_switch_drv = { + .ops = &mv88e6xxx_switch_ops, +}; + static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, struct device_node *np) { @@ -4565,7 +4569,7 @@ static struct mdio_driver mv88e6xxx_driver = { static int __init mv88e6xxx_init(void) { - register_switch_driver(&mv88e6xxx_switch_ops); + register_switch_driver(&mv88e6xxx_switch_drv); return mdio_driver_register(&mv88e6xxx_driver); } module_init(mv88e6xxx_init); @@ -4573,7 +4577,7 @@ module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { mdio_driver_unregister(&mv88e6xxx_driver); - unregister_switch_driver(&mv88e6xxx_switch_ops); + unregister_switch_driver(&mv88e6xxx_switch_drv); } module_exit(mv88e6xxx_cleanup); diff --git a/include/net/dsa.h b/include/net/dsa.h index b122196d5a1f..edfa9b130953 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -240,8 +240,6 @@ struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; struct dsa_switch_ops { - struct list_head list; - /* * Probing and setup. */ @@ -390,8 +388,13 @@ struct dsa_switch_ops { int (*cb)(struct switchdev_obj *obj)); }; -void register_switch_driver(struct dsa_switch_ops *type); -void unregister_switch_driver(struct dsa_switch_ops *type); +struct dsa_switch_driver { + struct list_head list; + struct dsa_switch_ops *ops; +}; + +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index cda787ebad15..4e7bc57cdae5 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -60,18 +60,18 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { static DEFINE_MUTEX(dsa_switch_drivers_mutex); static LIST_HEAD(dsa_switch_drivers); -void register_switch_driver(struct dsa_switch_ops *ops) +void register_switch_driver(struct dsa_switch_driver *drv) { mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&ops->list, &dsa_switch_drivers); + list_add_tail(&drv->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(register_switch_driver); -void unregister_switch_driver(struct dsa_switch_ops *ops) +void unregister_switch_driver(struct dsa_switch_driver *drv) { mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&ops->list); + list_del_init(&drv->list); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(unregister_switch_driver); @@ -90,8 +90,10 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { struct dsa_switch_ops *ops; + struct dsa_switch_driver *drv; - ops = list_entry(list, struct dsa_switch_ops, list); + drv = list_entry(list, struct dsa_switch_driver, list); + ops = drv->ops; name = ops->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { -- cgit v1.2.3 From a82f67afe8e297834bedafa529941d9d0808caf8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 8 Jan 2017 14:52:08 -0800 Subject: net: dsa: Make dsa_switch_ops const Now that we have properly encapsulated and made drivers utilize exported functions, we can switch dsa_switch_ops to be a annotated with const. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- drivers/net/dsa/bcm_sf2.c | 2 +- drivers/net/dsa/mv88e6060.c | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/qca8k.c | 2 +- include/net/dsa.h | 4 ++-- net/dsa/dsa.c | 10 +++++----- net/dsa/hwmon.c | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a448661b55c6..5102a3701a1a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1453,7 +1453,7 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_NONE; } -static struct dsa_switch_ops b53_switch_ops = { +static const struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .get_strings = b53_get_strings, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 52027718d06f..31d017086f8b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -977,7 +977,7 @@ static struct b53_io_ops bcm_sf2_io_ops = { .write64 = bcm_sf2_core_write64, }; -static struct dsa_switch_ops bcm_sf2_ops = { +static const struct dsa_switch_ops bcm_sf2_ops = { .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .setup = bcm_sf2_sw_setup, .get_strings = b53_get_strings, diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index bcbd6dcbd8e8..5934b7a4c448 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -252,7 +252,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) return reg_write(ds, addr, regnum, val); } -static struct dsa_switch_ops mv88e6060_switch_ops = { +static const struct dsa_switch_ops mv88e6060_switch_ops = { .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d43d12c281b3..eea8e0176e33 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4361,7 +4361,7 @@ static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port, return err; } -static struct dsa_switch_ops mv88e6xxx_switch_ops = { +static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index b3df70d07ff6..54d270d59eb0 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -911,7 +911,7 @@ qca8k_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_QCA; } -static struct dsa_switch_ops qca8k_switch_ops = { +static const struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, .get_strings = qca8k_get_strings, diff --git a/include/net/dsa.h b/include/net/dsa.h index edfa9b130953..b94d1f2ef912 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -169,7 +169,7 @@ struct dsa_switch { /* * The switch operations. */ - struct dsa_switch_ops *ops; + const struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -390,7 +390,7 @@ struct dsa_switch_ops { struct dsa_switch_driver { struct list_head list; - struct dsa_switch_ops *ops; + const struct dsa_switch_ops *ops; }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 4e7bc57cdae5..fd532487dfdf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -76,11 +76,11 @@ void unregister_switch_driver(struct dsa_switch_driver *drv) } EXPORT_SYMBOL_GPL(unregister_switch_driver); -static struct dsa_switch_ops * +static const struct dsa_switch_ops * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, const char **_name, void **priv) { - struct dsa_switch_ops *ret; + const struct dsa_switch_ops *ret; struct list_head *list; const char *name; @@ -89,7 +89,7 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { - struct dsa_switch_ops *ops; + const struct dsa_switch_ops *ops; struct dsa_switch_driver *drv; drv = list_entry(list, struct dsa_switch_driver, list); @@ -207,7 +207,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_switch_ops *ops = ds->ops; + const struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; @@ -326,7 +326,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; - struct dsa_switch_ops *ops; + const struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; const char *name; diff --git a/net/dsa/hwmon.c b/net/dsa/hwmon.c index 3a9cdf0b22b8..08831a811278 100644 --- a/net/dsa/hwmon.c +++ b/net/dsa/hwmon.c @@ -86,7 +86,7 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct dsa_switch *ds = dev_get_drvdata(dev); - struct dsa_switch_ops *ops = ds->ops; + const struct dsa_switch_ops *ops = ds->ops; umode_t mode = attr->mode; if (index == 1) { -- cgit v1.2.3 From 4b9d07a44015a0e940448fa3885b894349e8b162 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Jan 2017 16:55:12 +0100 Subject: net: introduce keepalive function in struct proto Direct call of tcp_set_keepalive() function from protocol-agnostic sock_setsockopt() function in net/core/sock.c violates network layering. And newly introduced protocol (SMC-R) will need its own keepalive function. Therefore, add "keepalive" function pointer to "struct proto", and call it from sock_setsockopt() via this pointer. Signed-off-by: Ursula Braun Reviewed-by: Utz Bacher Signed-off-by: David S. Miller --- include/net/sock.h | 1 + net/core/sock.c | 7 ++----- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index f0e867f58722..99deda67eba0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1024,6 +1024,7 @@ struct proto { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); + void (*keepalive)(struct sock *sk, int valbool); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, diff --git a/net/core/sock.c b/net/core/sock.c index f560e0826009..5018703ee2c2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -762,11 +762,8 @@ set_rcvbuf: goto set_rcvbuf; case SO_KEEPALIVE: -#ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) - tcp_set_keepalive(sk, valbool); -#endif + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, valbool); sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); break; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7e4be4f361f3..56d756ecfb59 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2376,6 +2376,7 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3705075f42c3..29a9bd5f1225 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -617,6 +617,7 @@ void tcp_set_keepalive(struct sock *sk, int val) else if (!val) inet_csk_delete_keepalive_timer(sk); } +EXPORT_SYMBOL_GPL(tcp_set_keepalive); static void tcp_keepalive_timer (unsigned long data) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a4cdf6a34c30..228965dca3c5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1889,6 +1889,7 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, -- cgit v1.2.3 From f16a7dd5cf27eeda187425c9c7d96802a549f9c4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Jan 2017 16:55:26 +0100 Subject: smc: netlink interface for SMC sockets Support for SMC socket monitoring via netlink sockets of protocol NETLINK_SOCK_DIAG. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/smc.h | 20 ++++ include/net/sock.h | 3 + include/uapi/linux/netlink.h | 1 + include/uapi/linux/smc_diag.h | 85 +++++++++++++++++ net/smc/Kconfig | 9 ++ net/smc/Makefile | 1 + net/smc/af_smc.c | 43 ++++++++- net/smc/smc.h | 2 + net/smc/smc_close.c | 1 + net/smc/smc_diag.c | 215 ++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 379 insertions(+), 1 deletion(-) create mode 100644 include/net/smc.h create mode 100644 include/uapi/linux/smc_diag.h create mode 100644 net/smc/smc_diag.c (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h new file mode 100644 index 000000000000..12d26358ad9f --- /dev/null +++ b/include/net/smc.h @@ -0,0 +1,20 @@ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the SMC module (socket related) + * + * Copyright IBM Corp. 2016 + * + * Author(s): Ursula Braun + */ +#ifndef _SMC_H +#define _SMC_H + +struct smc_hashinfo { + rwlock_t lock; + struct hlist_head ht; +}; + +int smc_hash_sk(struct sock *sk); +void smc_unhash_sk(struct sock *sk); +#endif /* _SMC_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 99deda67eba0..389a0a619b45 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -70,6 +70,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -986,6 +987,7 @@ struct request_sock_ops; struct timewait_sock_ops; struct inet_hashinfo; struct raw_hashinfo; +struct smc_hashinfo; struct module; /* @@ -1094,6 +1096,7 @@ struct proto { struct inet_hashinfo *hashinfo; struct udp_table *udp_table; struct raw_hashinfo *raw_hash; + struct smc_hashinfo *smc_hash; } h; struct module *owner; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 0dba4e4ed2be..f3946a27bd07 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -27,6 +27,7 @@ #define NETLINK_ECRYPTFS 19 #define NETLINK_RDMA 20 #define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ #define NETLINK_INET_DIAG NETLINK_SOCK_DIAG diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h new file mode 100644 index 000000000000..0063919fea34 --- /dev/null +++ b/include/uapi/linux/smc_diag.h @@ -0,0 +1,85 @@ +#ifndef _UAPI_SMC_DIAG_H_ +#define _UAPI_SMC_DIAG_H_ + +#include +#include +#include + +/* Request structure */ +struct smc_diag_req { + __u8 diag_family; + __u8 pad[2]; + __u8 diag_ext; /* Query extended information */ + struct inet_diag_sockid id; +}; + +/* Base info structure. It contains socket identity (addrs/ports/cookie) based + * on the internal clcsock, and more SMC-related socket data + */ +struct smc_diag_msg { + __u8 diag_family; + __u8 diag_state; + __u8 diag_fallback; + __u8 diag_shutdown; + struct inet_diag_sockid id; + + __u32 diag_uid; + __u64 diag_inode; +}; + +/* Extensions */ + +enum { + SMC_DIAG_NONE, + SMC_DIAG_CONNINFO, + SMC_DIAG_LGRINFO, + SMC_DIAG_SHUTDOWN, + __SMC_DIAG_MAX, +}; + +#define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) + +/* SMC_DIAG_CONNINFO */ + +struct smc_diag_cursor { + __u16 reserved; + __u16 wrap; + __u32 count; +}; + +struct smc_diag_conninfo { + __u32 token; /* unique connection id */ + __u32 sndbuf_size; /* size of send buffer */ + __u32 rmbe_size; /* size of RMB element */ + __u32 peer_rmbe_size; /* size of peer RMB element */ + /* local RMB element cursors */ + struct smc_diag_cursor rx_prod; /* received producer cursor */ + struct smc_diag_cursor rx_cons; /* received consumer cursor */ + /* peer RMB element cursors */ + struct smc_diag_cursor tx_prod; /* sent producer cursor */ + struct smc_diag_cursor tx_cons; /* sent consumer cursor */ + __u8 rx_prod_flags; /* received producer flags */ + __u8 rx_conn_state_flags; /* recvd connection flags*/ + __u8 tx_prod_flags; /* sent producer flags */ + __u8 tx_conn_state_flags; /* sent connection flags*/ + /* send buffer cursors */ + struct smc_diag_cursor tx_prep; /* prepared to be sent cursor */ + struct smc_diag_cursor tx_sent; /* sent cursor */ + struct smc_diag_cursor tx_fin; /* confirmed sent cursor */ +}; + +/* SMC_DIAG_LINKINFO */ + +struct smc_diag_linkinfo { + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ +}; + +struct smc_diag_lgrinfo { + struct smc_diag_linkinfo lnk[1]; + __u8 role; +}; +#endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/net/smc/Kconfig b/net/smc/Kconfig index bc029803e728..c717ef0896aa 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -9,3 +9,12 @@ config SMC a separate socket family SMC. Select this option if you want to run SMC socket applications + +config SMC_DIAG + tristate "SMC: socket monitoring interface" + depends on SMC + ---help--- + Support for SMC socket monitoring interface used by tools such as + smcss. + + if unsure, say Y. diff --git a/net/smc/Makefile b/net/smc/Makefile index 5cf0cafaa208..188104654b54 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_SMC) += smc.o +obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3f543d58bc5c..5d4208ad029e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "smc.h" #include "smc_clc.h" @@ -59,13 +60,48 @@ static void smc_set_keepalive(struct sock *sk, int val) smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); } -static struct proto smc_proto = { +static struct smc_hashinfo smc_v4_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), +}; + +int smc_hash_sk(struct sock *sk) +{ + struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; + struct hlist_head *head; + + head = &h->ht; + + write_lock_bh(&h->lock); + sk_add_node(sk, head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&h->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(smc_hash_sk); + +void smc_unhash_sk(struct sock *sk) +{ + struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; + + write_lock_bh(&h->lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&h->lock); +} +EXPORT_SYMBOL_GPL(smc_unhash_sk); + +struct proto smc_proto = { .name = "SMC", .owner = THIS_MODULE, .keepalive = smc_set_keepalive, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, .slab_flags = SLAB_DESTROY_BY_RCU, }; +EXPORT_SYMBOL_GPL(smc_proto); static int smc_release(struct socket *sock) { @@ -109,6 +145,7 @@ static int smc_release(struct socket *sock) schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); } + sk->sk_prot->unhash(sk); release_sock(sk); sock_put(sk); @@ -144,6 +181,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); + sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); return sk; @@ -536,6 +574,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) lsmc->sk.sk_err = -rc; new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); + sk->sk_prot->unhash(new_sk); sock_put(new_sk); *new_smc = NULL; goto out; @@ -545,6 +584,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); + sk->sk_prot->unhash(new_sk); sock_put(new_sk); *new_smc = NULL; goto out; @@ -1320,6 +1360,7 @@ static int __init smc_init(void) pr_err("%s: sock_register fails with %d\n", __func__, rc); goto out_proto; } + INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { diff --git a/net/smc/smc.h b/net/smc/smc.h index 959a5d2014ab..ee5fbea24549 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -21,6 +21,8 @@ #define SMC_MAX_PORTS 2 /* Max # of ports */ +extern struct proto smc_proto; + #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index d70c05b57021..03dfcc6b7661 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -384,6 +384,7 @@ void smc_close_sock_put_work(struct work_struct *work) struct smc_sock, sock_put_work); + smc->sk.sk_prot->unhash(&smc->sk); sock_put(&smc->sk); } diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c new file mode 100644 index 000000000000..d2d01cf70224 --- /dev/null +++ b/net/smc/smc_diag.c @@ -0,0 +1,215 @@ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Monitoring SMC transport protocol sockets + * + * Copyright IBM Corp. 2016 + * + * Author(s): Ursula Braun + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smc.h" +#include "smc_core.h" + +static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) +{ + sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", + be16_to_cpu(((__be16 *)gid_raw)[0]), + be16_to_cpu(((__be16 *)gid_raw)[1]), + be16_to_cpu(((__be16 *)gid_raw)[2]), + be16_to_cpu(((__be16 *)gid_raw)[3]), + be16_to_cpu(((__be16 *)gid_raw)[4]), + be16_to_cpu(((__be16 *)gid_raw)[5]), + be16_to_cpu(((__be16 *)gid_raw)[6]), + be16_to_cpu(((__be16 *)gid_raw)[7])); +} + +static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) +{ + struct smc_sock *smc = smc_sk(sk); + + r->diag_family = sk->sk_family; + if (!smc->clcsock) + return; + r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); + r->id.idiag_dport = smc->clcsock->sk->sk_dport; + r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; + r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; +} + +static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct smc_diag_msg *r, + struct user_namespace *user_ns) +{ + if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) + return 1; + + r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->diag_inode = sock_i_ino(sk); + return 0; +} + +static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct smc_diag_req *req, + struct nlattr *bc) +{ + struct smc_sock *smc = smc_sk(sk); + struct user_namespace *user_ns; + struct smc_diag_msg *r; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + smc_diag_msg_common_fill(r, sk); + r->diag_state = sk->sk_state; + r->diag_fallback = smc->use_fallback; + user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); + if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) + goto errout; + + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + struct smc_connection *conn = &smc->conn; + struct smc_diag_conninfo cinfo = { + .token = conn->alert_token_local, + .sndbuf_size = conn->sndbuf_size, + .rmbe_size = conn->rmbe_size, + .peer_rmbe_size = conn->peer_rmbe_size, + + .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, + .rx_prod.count = conn->local_rx_ctrl.prod.count, + .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap, + .rx_cons.count = conn->local_rx_ctrl.cons.count, + + .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap, + .tx_prod.count = conn->local_tx_ctrl.prod.count, + .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap, + .tx_cons.count = conn->local_tx_ctrl.cons.count, + + .tx_prod_flags = + *(u8 *)&conn->local_tx_ctrl.prod_flags, + .tx_conn_state_flags = + *(u8 *)&conn->local_tx_ctrl.conn_state_flags, + .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags, + .rx_conn_state_flags = + *(u8 *)&conn->local_rx_ctrl.conn_state_flags, + + .tx_prep.wrap = conn->tx_curs_prep.wrap, + .tx_prep.count = conn->tx_curs_prep.count, + .tx_sent.wrap = conn->tx_curs_sent.wrap, + .tx_sent.count = conn->tx_curs_sent.count, + .tx_fin.wrap = conn->tx_curs_fin.wrap, + .tx_fin.count = conn->tx_curs_fin.count, + }; + + if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0) + goto errout; + } + + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + struct smc_diag_lgrinfo linfo = { + .role = smc->conn.lgr->role, + .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, + .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id, + }; + + memcpy(linfo.lnk[0].ibname, + smc->conn.lgr->lnk[0].smcibdev->ibdev->name, + sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); + smc_gid_be16_convert(linfo.lnk[0].gid, + smc->conn.lgr->lnk[0].gid.raw); + smc_gid_be16_convert(linfo.lnk[0].peer_gid, + smc->conn.lgr->lnk[0].peer_gid); + + if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) + goto errout; + } + + nlmsg_end(skb, nlh); + return 0; + +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *bc = NULL; + struct hlist_head *head; + struct sock *sk; + int rc = 0; + + read_lock(&smc_proto.h.smc_hash->lock); + head = &smc_proto.h.smc_hash->ht; + if (hlist_empty(head)) + goto out; + + sk_for_each(sk, head) { + if (!net_eq(sock_net(sk), net)) + continue; + rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc); + if (rc) + break; + } + +out: + read_unlock(&smc_proto.h.smc_hash->lock); + return rc; +} + +static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + struct net *net = sock_net(skb->sk); + + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { + { + struct netlink_dump_control c = { + .dump = smc_diag_dump, + .min_dump_alloc = SKB_WITH_OVERHEAD(32768), + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } + } + return 0; +} + +static const struct sock_diag_handler smc_diag_handler = { + .family = AF_SMC, + .dump = smc_diag_handler_dump, +}; + +static int __init smc_diag_init(void) +{ + return sock_diag_register(&smc_diag_handler); +} + +static void __exit smc_diag_exit(void) +{ + sock_diag_unregister(&smc_diag_handler); +} + +module_init(smc_diag_init); +module_exit(smc_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); -- cgit v1.2.3 From af5d27c4e12b804c065c0e7c87507fea5683dab4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 9 Jan 2017 14:20:47 +0100 Subject: xfrm: remove xfrm_state_put_afinfo commit 44abdc3047aecafc141dfbaf1ed ("xfrm: replace rwlock on xfrm_state_afinfo with rcu") made xfrm_state_put_afinfo equivalent to rcu_read_unlock. Use spatch to replace it with direct calls to rcu_read_unlock: @@ struct xfrm_state_afinfo *a; @@ - xfrm_state_put_afinfo(a); + rcu_read_unlock(); old: text data bss dec hex filename 22570 72 424 23066 5a1a xfrm_state.o 1612 0 0 1612 64c xfrm_output.o new: 22554 72 424 23050 5a0a xfrm_state.o 1596 0 0 1596 63c xfrm_output.o Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/xfrm/xfrm_output.c | 8 +++----- net/xfrm/xfrm_state.c | 31 +++++++++++++------------------ 3 files changed, 16 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 31947b9c21d6..957d0cc30691 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -343,7 +343,6 @@ struct xfrm_state_afinfo { int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_input_afinfo { unsigned int family; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 637387bbaaea..8ba29fe58352 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -246,10 +246,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) return; afinfo = xfrm_state_get_afinfo(proto); - if (!afinfo) - return; - - afinfo->local_error(skb, mtu); - xfrm_state_put_afinfo(afinfo); + if (afinfo) + afinfo->local_error(skb, mtu); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 57e9578c35e2..783084484582 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -192,7 +192,7 @@ int xfrm_register_type(const struct xfrm_type *type, unsigned short family) else err = -EEXIST; spin_unlock_bh(&xfrm_type_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_type); @@ -213,7 +213,7 @@ int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) else typemap[type->proto] = NULL; spin_unlock_bh(&xfrm_type_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -235,13 +235,13 @@ retry: if (unlikely(type && !try_module_get(type->owner))) type = NULL; if (!type && !modload_attempted) { - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); request_module("xfrm-type-%d-%d", family, proto); modload_attempted = 1; goto retry; } - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return type; } @@ -280,7 +280,7 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) out: spin_unlock_bh(&xfrm_mode_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_mode); @@ -308,7 +308,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) } spin_unlock_bh(&xfrm_mode_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_unregister_mode); @@ -331,13 +331,13 @@ retry: if (unlikely(mode && !try_module_get(mode->owner))) mode = NULL; if (!mode && !modload_attempted) { - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); request_module("xfrm-mode-%d-%d", family, encap); modload_attempted = 1; goto retry; } - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return mode; } @@ -651,13 +651,13 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, afinfo->init_tempsel(&x->sel, fl); if (family != tmpl->encap_family) { - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); afinfo = xfrm_state_get_afinfo(tmpl->encap_family); if (!afinfo) return -1; } afinfo->init_temprop(x, tmpl, daddr, saddr); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return 0; } @@ -1474,7 +1474,7 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); spin_unlock_bh(&net->xfrm.xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_tmpl_sort); @@ -1494,7 +1494,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); spin_unlock_bh(&net->xfrm.xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_state_sort); @@ -1978,11 +1978,6 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) -{ - rcu_read_unlock(); -} - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { @@ -2025,7 +2020,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) if (afinfo->init_flags) err = afinfo->init_flags(x); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); if (err) goto error; -- cgit v1.2.3 From 711059b9752ad09ae6bcd4be8e48d30e5db483d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 9 Jan 2017 14:20:48 +0100 Subject: xfrm: add and use xfrm_state_afinfo_get_rcu xfrm_init_tempstate is always called from within rcu read side section. We can thus use a simpler function that doesn't call rcu_read_lock again. While at it, also make xfrm_init_tempstate return value void, the return value was never tested. A followup patch will replace remaining callers of xfrm_state_get_afinfo with xfrm_state_afinfo_get_rcu variant and then remove the 'old' get_afinfo interface. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_state.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 957d0cc30691..c52197cf51dc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -343,6 +343,7 @@ struct xfrm_state_afinfo { int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { unsigned int family; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 783084484582..b5dad899fb0e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -639,26 +639,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) } EXPORT_SYMBOL(xfrm_sad_getinfo); -static int +static void xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, const struct xfrm_tmpl *tmpl, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return -1; - afinfo->init_tempsel(&x->sel, fl); + struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family); + + if (afinfo) + afinfo->init_tempsel(&x->sel, fl); if (family != tmpl->encap_family) { - rcu_read_unlock(); - afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family); if (!afinfo) - return -1; + return; } afinfo->init_temprop(x, tmpl, daddr, saddr); - rcu_read_unlock(); - return 0; } static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, @@ -1966,6 +1963,14 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); +struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family) +{ + if (unlikely(family >= NPROTO)) + return NULL; + + return rcu_dereference(xfrm_state_afinfo[family]); +} + struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; -- cgit v1.2.3 From 55733350e5e8b70c5e54a30dbf98148c695f21f5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 11 Jan 2017 14:05:42 +0100 Subject: flow disector: ARP support Allow dissection of (R)ARP operation hardware and protocol addresses for Ethernet hardware and IPv4 protocol addresses. There are currently no users of FLOW_DISSECTOR_KEY_ARP. A follow-up patch will allow FLOW_DISSECTOR_KEY_ARP to be used by the flower classifier. Signed-off-by: Simon Horman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 19 +++++++++++++++ net/core/flow_dissector.c | 57 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d896a33e00d4..ac9703018a3a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -88,6 +88,24 @@ struct flow_dissector_key_addrs { }; }; +/** + * flow_dissector_key_arp: + * @ports: Operation, source and target addresses for an ARP header + * for Ethernet hardware addresses and IPv4 protocol addresses + * sip: Sender IP address + * tip: Target IP address + * op: Operation + * sha: Sender hardware address + * tpa: Target hardware address + */ +struct flow_dissector_key_arp { + __u32 sip; + __u32 tip; + __u8 op; + unsigned char sha[ETH_ALEN]; + unsigned char tha[ETH_ALEN]; +}; + /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header @@ -141,6 +159,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fe4e1531976c..5b3800fe20f3 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -138,6 +138,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; + struct flow_dissector_key_arp *key_arp; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; @@ -379,6 +380,62 @@ mpls: nhoff += FCOE_HEADER_LEN; goto out_good; + + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): { + struct { + unsigned char ar_sha[ETH_ALEN]; + unsigned char ar_sip[4]; + unsigned char ar_tha[ETH_ALEN]; + unsigned char ar_tip[4]; + } *arp_eth, _arp_eth; + const struct arphdr *arp; + struct arphdr *_arp; + + arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, + hlen, &_arp); + if (!arp) + goto out_bad; + + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_hln != ETH_ALEN || + arp->ar_pln != 4 || + (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST))) + goto out_bad; + + arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), + sizeof(_arp_eth), data, + hlen - sizeof(_arp), + &_arp_eth); + if (!arp) + goto out_bad; + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ARP)) { + + key_arp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ARP, + target_container); + + memcpy(&key_arp->sip, arp_eth->ar_sip, + sizeof(key_arp->sip)); + memcpy(&key_arp->tip, arp_eth->ar_tip, + sizeof(key_arp->tip)); + + /* Only store the lower byte of the opcode; + * this covers ARPOP_REPLY and ARPOP_REQUEST. + */ + key_arp->op = ntohs(arp->ar_op) & 0xff; + + ether_addr_copy(key_arp->sha, arp_eth->ar_sha); + ether_addr_copy(key_arp->tha, arp_eth->ar_tha); + } + + goto out_good; + } + default: goto out_bad; } -- cgit v1.2.3 From 93be2b74279c15c2844684b1a027fdc71dd5d9bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:35:25 +0100 Subject: wext: handle NULL extra data in iwe_stream_add_point better gcc-7 complains that wl3501_cs passes NULL into a function that then uses the argument as the input for memcpy: drivers/net/wireless/wl3501_cs.c: In function 'wl3501_get_scan': include/net/iw_handler.h:559:3: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy(stream + point_len, extra, iwe->u.data.length); This works fine here because iwe->u.data.length is guaranteed to be 0 and the memcpy doesn't actually have an effect. Making the length check explicit avoids the warning and should have no other effect here. Also check the pointer itself, since otherwise we get warnings elsewhere in the code. Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index e0f4109e64c6..c2aa73e5e6bb 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -556,7 +556,8 @@ iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, memcpy(stream + lcp_len, ((char *) &iwe->u) + IW_EV_POINT_OFF, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - memcpy(stream + point_len, extra, iwe->u.data.length); + if (iwe->u.data.length && extra) + memcpy(stream + point_len, extra, iwe->u.data.length); stream += event_len; } return stream; -- cgit v1.2.3 From cef0acd4d7d4811d2d19cd0195031bf0dfe41249 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Mon, 21 Nov 2016 16:58:40 +0200 Subject: mac80211: Add RX flag to indicate ICV stripped Add a flag that indicates that the WEP ICV was stripped from an RX packet, allowing the device to not transfer that if it's already checked. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 ++++- net/mac80211/wep.c | 3 ++- net/mac80211/wpa.c | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5f5cb194cd78..86967b85dfd0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1017,7 +1017,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this @@ -1088,6 +1088,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must + * be done in the hardware. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1123,6 +1125,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), RX_FLAG_MIC_STRIPPED = BIT_ULL(32), RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), + RX_FLAG_ICV_STRIPPED = BIT_ULL(34), }; #define RX_FLAG_STBC_SHIFT 26 diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index efa3f48f1ec5..73e8f347802e 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) + if (!(status->flag & RX_FLAG_ICV_STRIPPED) && + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8af6dd388d11..c1ef22df865f 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -294,7 +294,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); + if (!(status->flag & RX_FLAG_ICV_STRIPPED)) + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); -- cgit v1.2.3 From 10b2eb6949ece992a1dd58edb28e01f05e5bf004 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Jan 2017 09:31:32 +0100 Subject: wext: uninline stream addition functions With 78, 111 and 85 bytes respectively (on x86-64), the functions iwe_stream_add_event(), iwe_stream_add_point() and iwe_stream_add_value() really shouldn't be inlines. It appears that at least my compiler already decided the same, and created a single instance of each one of them for each file using it, but that's still a number of instances in the system overall, which this reduces. Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 67 +++++------------------------------------------- net/wireless/wext-core.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 60 deletions(-) (limited to 'include/net') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index c2aa73e5e6bb..2509728650bd 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -505,25 +505,8 @@ static inline int iwe_stream_event_len_adjust(struct iw_request_info *info, /* * Wrapper to add an Wireless Event to a stream of events. */ -static inline char * -iwe_stream_add_event(struct iw_request_info *info, char *stream, char *ends, - struct iw_event *iwe, int event_len) -{ - int lcp_len = iwe_stream_lcp_len(info); - - event_len = iwe_stream_event_len_adjust(info, event_len); - - /* Check if it's possible */ - if(likely((stream + event_len) < ends)) { - iwe->len = event_len; - /* Beware of alignement issues on 64 bits */ - memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); - memcpy(stream + lcp_len, &iwe->u, - event_len - lcp_len); - stream += event_len; - } - return stream; -} +char *iwe_stream_add_event(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, int event_len); static inline char * iwe_stream_add_event_check(struct iw_request_info *info, char *stream, @@ -541,27 +524,8 @@ iwe_stream_add_event_check(struct iw_request_info *info, char *stream, * Wrapper to add an short Wireless Event containing a pointer to a * stream of events. */ -static inline char * -iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, - struct iw_event *iwe, char *extra) -{ - int event_len = iwe_stream_point_len(info) + iwe->u.data.length; - int point_len = iwe_stream_point_len(info); - int lcp_len = iwe_stream_lcp_len(info); - - /* Check if it's possible */ - if(likely((stream + event_len) < ends)) { - iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); - memcpy(stream + lcp_len, - ((char *) &iwe->u) + IW_EV_POINT_OFF, - IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - if (iwe->u.data.length && extra) - memcpy(stream + point_len, extra, iwe->u.data.length); - stream += event_len; - } - return stream; -} +char *iwe_stream_add_point(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, char *extra); static inline char * iwe_stream_add_point_check(struct iw_request_info *info, char *stream, @@ -580,25 +544,8 @@ iwe_stream_add_point_check(struct iw_request_info *info, char *stream, * Be careful, this one is tricky to use properly : * At the first run, you need to have (value = event + IW_EV_LCP_LEN). */ -static inline char * -iwe_stream_add_value(struct iw_request_info *info, char *event, char *value, - char *ends, struct iw_event *iwe, int event_len) -{ - int lcp_len = iwe_stream_lcp_len(info); - - /* Don't duplicate LCP */ - event_len -= IW_EV_LCP_LEN; - - /* Check if it's possible */ - if(likely((value + event_len) < ends)) { - /* Add new value */ - memcpy(value, &iwe->u, event_len); - value += event_len; - /* Patch LCP */ - iwe->len = value - event; - memcpy(event, (char *) iwe, lcp_len); - } - return value; -} +char *iwe_stream_add_value(struct iw_request_info *info, char *event, + char *value, char *ends, struct iw_event *iwe, + int event_len); #endif /* _IW_HANDLER_H */ diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6250b1cfcde5..1a4db6790e20 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1119,3 +1119,70 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, return ret; } #endif + +char *iwe_stream_add_event(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, int event_len) +{ + int lcp_len = iwe_stream_lcp_len(info); + + event_len = iwe_stream_event_len_adjust(info, event_len); + + /* Check if it's possible */ + if (likely((stream + event_len) < ends)) { + iwe->len = event_len; + /* Beware of alignement issues on 64 bits */ + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); + memcpy(stream + lcp_len, &iwe->u, + event_len - lcp_len); + stream += event_len; + } + + return stream; +} +EXPORT_SYMBOL(iwe_stream_add_event); + +char *iwe_stream_add_point(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, char *extra) +{ + int event_len = iwe_stream_point_len(info) + iwe->u.data.length; + int point_len = iwe_stream_point_len(info); + int lcp_len = iwe_stream_lcp_len(info); + + /* Check if it's possible */ + if (likely((stream + event_len) < ends)) { + iwe->len = event_len; + memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); + memcpy(stream + lcp_len, + ((char *) &iwe->u) + IW_EV_POINT_OFF, + IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); + if (iwe->u.data.length && extra) + memcpy(stream + point_len, extra, iwe->u.data.length); + stream += event_len; + } + + return stream; +} +EXPORT_SYMBOL(iwe_stream_add_point); + +char *iwe_stream_add_value(struct iw_request_info *info, char *event, + char *value, char *ends, struct iw_event *iwe, + int event_len) +{ + int lcp_len = iwe_stream_lcp_len(info); + + /* Don't duplicate LCP */ + event_len -= IW_EV_LCP_LEN; + + /* Check if it's possible */ + if (likely((value + event_len) < ends)) { + /* Add new value */ + memcpy(value, &iwe->u, event_len); + value += event_len; + /* Patch LCP */ + iwe->len = value - event; + memcpy(event, (char *) iwe, lcp_len); + } + + return value; +} +EXPORT_SYMBOL(iwe_stream_add_value); -- cgit v1.2.3 From bf95ecdba93b98d27ac219e79f773f2074b4ca47 Mon Sep 17 00:00:00 2001 From: vamsi krishna Date: Fri, 13 Jan 2017 01:12:20 +0200 Subject: cfg80211: Add support to sched scan to report better BSSs Enhance sched scan to support option of finding a better BSS while in connected state. Firmware scans the medium and reports when it finds a known BSS which has better RSSI than the current connected BSS. New attributes to specify the relative RSSI (compared to the current BSS) are added to the sched scan to implement this. Signed-off-by: vamsi krishna Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 36 +++++++++++++++++++++++++----------- include/uapi/linux/nl80211.h | 30 ++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cb13789ebaef..4456491132cd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1619,6 +1619,17 @@ struct cfg80211_sched_scan_plan { u32 iterations; }; +/** + * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. + * + * @band: band of BSS which should match for RSSI level adjustment. + * @delta: value of RSSI level adjustment. + */ +struct cfg80211_bss_select_adjust { + enum nl80211_band band; + s8 delta; +}; + /** * struct cfg80211_sched_scan_request - scheduled scan request description * @@ -1654,6 +1665,16 @@ struct cfg80211_sched_scan_plan { * cycle. The driver may ignore this parameter and start * immediately (or at any other time), if this feature is not * supported. + * @relative_rssi_set: Indicates whether @relative_rssi is set or not. + * @relative_rssi: Relative RSSI threshold in dB to restrict scan result + * reporting in connected state to cases where a matching BSS is determined + * to have better or slightly worse RSSI than the current connected BSS. + * The relative RSSI threshold values are ignored in disconnected state. + * @rssi_adjust: delta dB of RSSI preference to be given to the BSSs that belong + * to the specified band while deciding whether a better BSS is reported + * using @relative_rssi. If delta is a negative number, the BSSs that + * belong to the specified band will be penalized by delta dB in relative + * comparisions. */ struct cfg80211_sched_scan_request { struct cfg80211_ssid *ssids; @@ -1673,6 +1694,10 @@ struct cfg80211_sched_scan_request { u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); + bool relative_rssi_set; + s8 relative_rssi; + struct cfg80211_bss_select_adjust rssi_adjust; + /* internal */ struct wiphy *wiphy; struct net_device *dev; @@ -1980,17 +2005,6 @@ struct cfg80211_ibss_params { struct ieee80211_ht_cap ht_capa_mask; }; -/** - * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. - * - * @band: band of BSS which should match for RSSI level adjustment. - * @delta: value of RSSI level adjustment. - */ -struct cfg80211_bss_select_adjust { - enum nl80211_band band; - s8 delta; -}; - /** * struct cfg80211_bss_selection - connection parameters for BSS selection. * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 908886c83894..6b17feb5e839 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1982,6 +1982,20 @@ enum nl80211_commands { * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also * used in various commands/events for specifying the BSSID. * + * @NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI: Relative RSSI threshold by which + * other BSSs has to be better or slightly worse than the current + * connected BSS so that they get reported to user space. + * This will give an opportunity to userspace to consider connecting to + * other matching BSSs which have better or slightly worse RSSI than + * the current connected BSS by using an offloaded operation to avoid + * unnecessary wakeups. + * + * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in + * the specified band is to be adjusted before doing + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparision to figure out + * better BSSs. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2388,6 +2402,9 @@ enum nl80211_attrs { NL80211_ATTR_BSSID, + NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, + NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3080,6 +3097,13 @@ enum nl80211_reg_rule_attr { * how this API was implemented in the past. Also, due to the same problem, * the only way to create a matchset with only an RSSI filter (with this * attribute) is if there's only a single matchset with the RSSI attribute. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI: Flag indicating whether + * %NL80211_SCHED_SCAN_MATCH_ATTR_RSSI to be used as absolute RSSI or + * relative to current bss's RSSI. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST: When present the RSSI level for + * BSS-es in the specified band is to be adjusted before doing + * RSSI-based BSS selection. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -3089,6 +3113,8 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_SSID, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, @@ -4703,6 +4729,9 @@ enum nl80211_feature_flags { * in @NL80211_CMD_FRAME while not associated. * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED: This driver supports * randomized TA in @NL80211_CMD_FRAME while associated. + * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan + * for reporting BSSs with better RSSI than the current connected BSS + * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI). * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4720,6 +4749,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_FILS_STA, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, + NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b378d0a04003..71c66ff9a702 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -405,6 +405,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, [NL80211_ATTR_BSSID] = { .len = ETH_ALEN }, + [NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 }, + [NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = { + .len = sizeof(struct nl80211_bss_select_rssi_adjust) + }, }; /* policy for the key attributes */ @@ -6950,6 +6954,12 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (!n_plans || n_plans > wiphy->max_sched_scan_plans) return ERR_PTR(-EINVAL); + if (!wiphy_ext_feature_isset( + wiphy, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI) && + (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] || + attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) + return ERR_PTR(-EINVAL); + request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->match_sets) * n_match_sets @@ -7156,6 +7166,26 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->delay = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); + if (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]) { + request->relative_rssi = nla_get_s8( + attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]); + request->relative_rssi_set = true; + } + + if (request->relative_rssi_set && + attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]) { + struct nl80211_bss_select_rssi_adjust *rssi_adjust; + + rssi_adjust = nla_data( + attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]); + request->rssi_adjust.band = rssi_adjust->band; + request->rssi_adjust.delta = rssi_adjust->delta; + if (!is_band_valid(wiphy, request->rssi_adjust.band)) { + err = -EINVAL; + goto out_free; + } + } + err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs); if (err) goto out_free; @@ -9692,6 +9722,20 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) return -ENOBUFS; + if (req->relative_rssi_set) { + struct nl80211_bss_select_rssi_adjust rssi_adjust; + + if (nla_put_s8(msg, NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, + req->relative_rssi)) + return -ENOBUFS; + + rssi_adjust.band = req->rssi_adjust.band; + rssi_adjust.delta = req->rssi_adjust.delta; + if (nla_put(msg, NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, + sizeof(rssi_adjust), &rssi_adjust)) + return -ENOBUFS; + } + freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; -- cgit v1.2.3 From 3093ebbeabcdddc9a982950052f2151df43c7aa2 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Fri, 13 Jan 2017 01:12:21 +0200 Subject: cfg80211: Specify the reason for connect timeout This enhances the connect timeout API to also carry the reason for the timeout. These reason codes for the connect time out are represented by enum nl80211_timeout_reason and are passed to user space through a new attribute NL80211_ATTR_TIMEOUT_REASON (u32). Signed-off-by: Purushottam Kushwaha Signed-off-by: Jouni Malinen [keep gfp_t argument last] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 18 ++++++++++++++---- include/uapi/linux/nl80211.h | 21 +++++++++++++++++++++ net/wireless/core.h | 4 +++- net/wireless/mlme.c | 3 ++- net/wireless/nl80211.c | 9 +++++++-- net/wireless/nl80211.h | 4 +++- net/wireless/sme.c | 39 +++++++++++++++++++++++++++------------ net/wireless/util.c | 2 +- 8 files changed, 78 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4456491132cd..9b3427c8d1db 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5090,6 +5090,12 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you * the real status code for failures. * @gfp: allocation flags + * @timeout_reason: reason for connection timeout. This is used when the + * connection fails due to a timeout instead of an explicit rejection from + * the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is + * not known. This value is used only if @status < 0 to indicate that the + * failure is due to a timeout and not due to explicit rejection by the AP. + * This value is ignored in other cases (@status >= 0). * * It should be called by the underlying driver whenever connect() has * succeeded. This is similar to cfg80211_connect_result(), but with the @@ -5099,7 +5105,8 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, int status, gfp_t gfp); + size_t resp_ie_len, int status, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason); /** * cfg80211_connect_result - notify cfg80211 of connection result @@ -5125,7 +5132,8 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, u16 status, gfp_t gfp) { cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, resp_ie, - resp_ie_len, status, gfp); + resp_ie_len, status, gfp, + NL80211_TIMEOUT_UNSPECIFIED); } /** @@ -5136,6 +5144,7 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @gfp: allocation flags + * @timeout_reason: reason for connection timeout. * * It should be called by the underlying driver whenever connect() has failed * in a sequence where no explicit authentication/association rejection was @@ -5145,10 +5154,11 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, */ static inline void cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, gfp_t gfp) + const u8 *req_ie, size_t req_ie_len, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason) { cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1, - gfp); + gfp, timeout_reason); } /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6b17feb5e839..c51b40cc0645 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1996,6 +1996,10 @@ enum nl80211_commands { * better BSSs. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. * + * @NL80211_ATTR_TIMEOUT_REASON: The reason for which an operation timed out. + * u32 attribute with an &enum nl80211_timeout_reason value. This is used, + * e.g., with %NL80211_CMD_CONNECT event. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2405,6 +2409,8 @@ enum nl80211_attrs { NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, + NL80211_ATTR_TIMEOUT_REASON, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4788,6 +4794,21 @@ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_BLOCKED_CLIENT, }; +/** + * enum nl80211_timeout_reason - timeout reasons + * + * @NL80211_TIMEOUT_UNSPECIFIED: Timeout reason unspecified. + * @NL80211_TIMEOUT_SCAN: Scan (AP discovery) timed out. + * @NL80211_TIMEOUT_AUTH: Authentication timed out. + * @NL80211_TIMEOUT_ASSOC: Association timed out. + */ +enum nl80211_timeout_reason { + NL80211_TIMEOUT_UNSPECIFIED, + NL80211_TIMEOUT_SCAN, + NL80211_TIMEOUT_AUTH, + NL80211_TIMEOUT_ASSOC, +}; + /** * enum nl80211_scan_flags - scan request control flags * diff --git a/net/wireless/core.h b/net/wireless/core.h index ba42055a036d..58ca206982fe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -228,6 +228,7 @@ struct cfg80211_event { size_t resp_ie_len; struct cfg80211_bss *bss; int status; /* -1 = failed; 0..65535 = status code */ + enum nl80211_timeout_reason timeout_reason; } cr; struct { const u8 *req_ie; @@ -388,7 +389,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, int status, bool wextev, - struct cfg80211_bss *bss); + struct cfg80211_bss *bss, + enum nl80211_timeout_reason timeout_reason); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index b876f40c9dad..22b3d9990065 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -48,7 +48,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, /* update current_bss etc., consumes the bss reference */ __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, - status_code == WLAN_STATUS_SUCCESS, bss); + status_code == WLAN_STATUS_SUCCESS, bss, + NL80211_TIMEOUT_UNSPECIFIED); } EXPORT_SYMBOL(cfg80211_rx_assoc_resp); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 71c66ff9a702..b4e7bdd673e0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -409,6 +409,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, + [NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -13231,7 +13232,9 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, - int status, gfp_t gfp) + int status, + enum nl80211_timeout_reason timeout_reason, + gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -13252,7 +13255,9 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE : status) || - (status < 0 && nla_put_flag(msg, NL80211_ATTR_TIMED_OUT)) || + (status < 0 && + (nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || + nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, timeout_reason))) || (req_ie && nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) || (resp_ie && diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 75f82520211d..e488dca87423 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -56,7 +56,9 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, - int status, gfp_t gfp); + int status, + enum nl80211_timeout_reason timeout_reason, + gfp_t gfp); void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 46693913fcea..b347e63d7aaa 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -34,10 +34,11 @@ struct cfg80211_conn { CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, - CFG80211_CONN_AUTH_FAILED, + CFG80211_CONN_AUTH_FAILED_TIMEOUT, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, + CFG80211_CONN_ASSOC_FAILED_TIMEOUT, CFG80211_CONN_DEAUTH, CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, @@ -140,7 +141,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) return err; } -static int cfg80211_conn_do_work(struct wireless_dev *wdev) +static int cfg80211_conn_do_work(struct wireless_dev *wdev, + enum nl80211_timeout_reason *treason) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_connect_params *params; @@ -171,7 +173,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) NULL, 0, params->key, params->key_len, params->key_idx, NULL, 0); - case CFG80211_CONN_AUTH_FAILED: + case CFG80211_CONN_AUTH_FAILED_TIMEOUT: + *treason = NL80211_TIMEOUT_AUTH; return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: if (WARN_ON(!rdev->ops->assoc)) @@ -198,6 +201,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; + case CFG80211_CONN_ASSOC_FAILED_TIMEOUT: + *treason = NL80211_TIMEOUT_ASSOC; + /* fall through */ case CFG80211_CONN_ASSOC_FAILED: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, @@ -223,6 +229,7 @@ void cfg80211_conn_work(struct work_struct *work) container_of(work, struct cfg80211_registered_device, conn_work); struct wireless_dev *wdev; u8 bssid_buf[ETH_ALEN], *bssid = NULL; + enum nl80211_timeout_reason treason; rtnl_lock(); @@ -244,10 +251,12 @@ void cfg80211_conn_work(struct work_struct *work) memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; } - if (cfg80211_conn_do_work(wdev)) { + treason = NL80211_TIMEOUT_UNSPECIFIED; + if (cfg80211_conn_do_work(wdev, &treason)) { __cfg80211_connect_result( wdev->netdev, bssid, - NULL, 0, NULL, 0, -1, false, NULL); + NULL, 0, NULL, 0, -1, false, NULL, + treason); } wdev_unlock(wdev); } @@ -352,7 +361,8 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) } else if (status_code != WLAN_STATUS_SUCCESS) { __cfg80211_connect_result(wdev->netdev, mgmt->bssid, NULL, 0, NULL, 0, - status_code, false, NULL); + status_code, false, NULL, + NL80211_TIMEOUT_UNSPECIFIED); } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); @@ -400,7 +410,7 @@ void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) if (!wdev->conn) return; - wdev->conn->state = CFG80211_CONN_AUTH_FAILED; + wdev->conn->state = CFG80211_CONN_AUTH_FAILED_TIMEOUT; schedule_work(&rdev->conn_work); } @@ -422,7 +432,7 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) if (!wdev->conn) return; - wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED_TIMEOUT; schedule_work(&rdev->conn_work); } @@ -564,7 +574,9 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, /* we're good if we have a matching bss struct */ if (bss) { - err = cfg80211_conn_do_work(wdev); + enum nl80211_timeout_reason treason; + + err = cfg80211_conn_do_work(wdev, &treason); cfg80211_put_bss(wdev->wiphy, bss); } else { /* otherwise we'll need to scan for the AP first */ @@ -661,7 +673,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, int status, bool wextev, - struct cfg80211_bss *bss) + struct cfg80211_bss *bss, + enum nl80211_timeout_reason timeout_reason) { struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *country_ie; @@ -680,7 +693,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, - status, GFP_KERNEL); + status, timeout_reason, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT if (wextev) { @@ -771,7 +784,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, int status, gfp_t gfp) + size_t resp_ie_len, int status, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -811,6 +825,7 @@ void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, cfg80211_hold_bss(bss_from_pub(bss)); ev->cr.bss = bss; ev->cr.status = status; + ev->cr.timeout_reason = timeout_reason; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index cd8a7ae55e7d..1b9296882dcd 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -951,7 +951,7 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->cr.resp_ie, ev->cr.resp_ie_len, ev->cr.status, ev->cr.status == WLAN_STATUS_SUCCESS, - ev->cr.bss); + ev->cr.bss, ev->cr.timeout_reason); break; case EVENT_ROAMED: __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie, -- cgit v1.2.3 From c88215d7050f065afaed33e9599c2ef4e5e6ee22 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 13 Jan 2017 01:12:22 +0200 Subject: cfg80211: Fix documentation for connect result The function documentation for cfg80211_connect_bss() and cfg80211_connect_result() was still claiming that they are used only for a success case while these functions can now be used to report both success and various failure cases. The actual use cases were already described in the connect() documentation. Update the function specific comments to note the failure cases and also describe how the special status == -1 case is used in cfg80211_connect_bss() to indicate a connection timeout based on the internal implementation in cfg80211_connect_timeout(). Signed-off-by: Jouni Malinen [use tabs for indentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9b3427c8d1db..b7aba6e1a586 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5086,9 +5086,14 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @status: status code, 0 for successful connection, use - * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you - * the real status code for failures. + * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use + * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you + * the real status code for failures. If this call is used to report a + * failure due to a timeout (e.g., not receiving an Authentication frame + * from the AP) instead of an explicit rejection by the AP, -1 is used to + * indicate that this is a failure, but without a status code. + * @timeout_reason is used to report the reason for the timeout in that + * case. * @gfp: allocation flags * @timeout_reason: reason for connection timeout. This is used when the * connection fails due to a timeout instead of an explicit rejection from @@ -5097,10 +5102,10 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * failure is due to a timeout and not due to explicit rejection by the AP. * This value is ignored in other cases (@status >= 0). * - * It should be called by the underlying driver whenever connect() has - * succeeded. This is similar to cfg80211_connect_result(), but with the - * option of identifying the exact bss entry for the connection. Only one of - * these functions should be called. + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_result(), but with the option of identifying the exact bss + * entry for the connection. Only one of these functions should be called. */ void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, struct cfg80211_bss *bss, const u8 *req_ie, @@ -5117,13 +5122,15 @@ void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @status: status code, 0 for successful connection, use + * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you * the real status code for failures. * @gfp: allocation flags * - * It should be called by the underlying driver whenever connect() has - * succeeded. + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_bss() which allows the exact bss entry to be specified. Only + * one of these functions should be called. */ static inline void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, -- cgit v1.2.3 From e636f8b0104d6622aaaed6aa5ef17dfbf165bc51 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:31 -0800 Subject: tcp: new helper for RACK to detect loss Create a new helper tcp_rack_detect_loss to prepare the upcoming RACK reordering timer patch. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- net/ipv4/tcp_input.c | 12 ++++++++---- net/ipv4/tcp_recovery.c | 22 +++++++++++++--------- 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1da0aa724929..51183bba3835 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,8 +1863,7 @@ extern int sysctl_tcp_recovery; /* Use TCP RACK to detect (some) tail and retransmit losses */ #define TCP_RACK_LOST_RETRANS 0x1 -extern int tcp_rack_mark_lost(struct sock *sk); - +extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, const struct skb_mstamp *xmit_time, u8 sacked); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ec6d84363024..bb24b93e64bc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2865,10 +2865,14 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && - tcp_rack_mark_lost(sk)) { - flag |= FLAG_LOST_RETRANS; - *ack_flag |= FLAG_LOST_RETRANS; + if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) { + u32 prior_retrans = tp->retrans_out; + + tcp_rack_mark_lost(sk); + if (prior_retrans > tp->retrans_out) { + flag |= FLAG_LOST_RETRANS; + *ack_flag |= FLAG_LOST_RETRANS; + } } /* E. Process state. */ diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index f38dba5aed7a..7ea0377229c0 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -32,17 +32,11 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) * The current version is only used after recovery starts but can be * easily extended to detect the first loss. */ -int tcp_rack_mark_lost(struct sock *sk) +static void tcp_rack_detect_loss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - u32 reo_wnd, prior_retrans = tp->retrans_out; - - if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) - return 0; - - /* Reset the advanced flag to avoid unnecessary queue scanning */ - tp->rack.advanced = 0; + u32 reo_wnd; /* To be more reordering resilient, allow min_rtt/4 settling delay * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed @@ -82,7 +76,17 @@ int tcp_rack_mark_lost(struct sock *sk) break; } } - return prior_retrans - tp->retrans_out; +} + +void tcp_rack_mark_lost(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) + return; + /* Reset the advanced flag to avoid unnecessary queue scanning */ + tp->rack.advanced = 0; + tcp_rack_detect_loss(sk); } /* Record the most recently (re)sent time among the (s)acked packets */ -- cgit v1.2.3 From deed7be78f512d003c6290da0a781479b31b3d74 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:32 -0800 Subject: tcp: record most recent RTT in RACK loss detection Record the most recent RTT in RACK. It is often identical to the "ca_rtt_us" values in tcp_clean_rtx_queue. But when the packet has been retransmitted, RACK choses to believe the ACK is for the (latest) retransmitted packet if the RTT is over minimum RTT. This requires passing the arrival time of the most recent ACK to RACK routines. The timestamp is now recorded in the "ack_time" in tcp_sacktag_state during the ACK processing. This patch does not change the RACK algorithm itself. It only adds the RTT variable to prepare the next main patch. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 7 ++++--- net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++-------------- net/ipv4/tcp_recovery.c | 41 +++++++++++++++++++++++------------------ 4 files changed, 50 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fc5848dad7a4..1255c592719c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -207,6 +207,7 @@ struct tcp_sock { /* Information of the most recently (s)acked skb */ struct tcp_rack { struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u32 rtt_us; /* Associated RTT */ u8 advanced; /* mstamp advanced since last lost marking */ u8 reord; /* reordering detected */ } rack; diff --git a/include/net/tcp.h b/include/net/tcp.h index 51183bba3835..1439107658c2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,9 +1863,10 @@ extern int sysctl_tcp_recovery; /* Use TCP RACK to detect (some) tail and retransmit losses */ #define TCP_RACK_LOST_RETRANS 0x1 -extern void tcp_rack_mark_lost(struct sock *sk); -extern void tcp_rack_advance(struct tcp_sock *tp, - const struct skb_mstamp *xmit_time, u8 sacked); +extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); +extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, + const struct skb_mstamp *xmit_time, + const struct skb_mstamp *ack_time); /* * Save and compile IPv4 options, return a pointer to it diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bb24b93e64bc..8ccd171999bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1135,6 +1135,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */ struct rate_sample *rate; int flag; }; @@ -1217,7 +1218,7 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { - tcp_rack_advance(tp, xmit_time, sacked); + tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time); if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, @@ -2813,7 +2814,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const int acked, - bool is_dupack, int *ack_flag, int *rexmit) + bool is_dupack, int *ack_flag, int *rexmit, + const struct skb_mstamp *ack_time) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2868,7 +2870,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) { u32 prior_retrans = tp->retrans_out; - tcp_rack_mark_lost(sk); + tcp_rack_mark_lost(sk, ack_time); if (prior_retrans > tp->retrans_out) { flag |= FLAG_LOST_RETRANS; *ack_flag |= FLAG_LOST_RETRANS; @@ -3105,11 +3107,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack, - struct skb_mstamp *now) + struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); struct skb_mstamp first_ackt, last_ackt; + struct skb_mstamp *now = &sack->ack_time; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3169,7 +3171,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } else if (tcp_is_sack(tp)) { tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) - tcp_rack_advance(tp, &skb->skb_mstamp, sacked); + tcp_rack_advance(tp, sacked, + &skb->skb_mstamp, + &sack->ack_time); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3599,7 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ - struct skb_mstamp now; sack_state.first_sackt.v64 = 0; sack_state.rate = &rs; @@ -3625,7 +3628,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - skb_mstamp_get(&now); + skb_mstamp_get(&sack_state.ack_time); if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) @@ -3693,11 +3696,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state, &now); + &sack_state); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); } if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -3712,15 +3716,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag, &rs); + tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time, + sack_state.rate); + tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3741,9 +3747,11 @@ old_ack: * If data was DSACKed, see if we can undo a cwnd reduction. */ if (TCP_SKB_CB(skb)->sacked) { + skb_mstamp_get(&sack_state.ack_time); flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, + &sack_state.ack_time); tcp_xmit_recovery(sk, rexmit); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 7ea0377229c0..557363cde58a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -32,7 +32,7 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) * The current version is only used after recovery starts but can be * easily extended to detect the first loss. */ -static void tcp_rack_detect_loss(struct sock *sk) +static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -62,13 +62,14 @@ static void tcp_rack_detect_loss(struct sock *sk) continue; if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { - - if (skb_mstamp_us_delta(&tp->rack.mstamp, - &skb->skb_mstamp) <= reo_wnd) - continue; - - /* skb is lost if packet sent later is sacked */ - tcp_rack_mark_skb_lost(sk, skb); + /* Step 3 in draft-cheng-tcpm-rack-00.txt: + * A packet is lost if its elapsed time is beyond + * the recent RTT plus the reordering window. + */ + if (skb_mstamp_us_delta(now, &skb->skb_mstamp) > + tp->rack.rtt_us + reo_wnd) { + tcp_rack_mark_skb_lost(sk, skb); + } } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early * b/c the rest are all sent after rack_sent @@ -78,7 +79,7 @@ static void tcp_rack_detect_loss(struct sock *sk) } } -void tcp_rack_mark_lost(struct sock *sk) +void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) { struct tcp_sock *tp = tcp_sk(sk); @@ -86,20 +87,25 @@ void tcp_rack_mark_lost(struct sock *sk) return; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; - tcp_rack_detect_loss(sk); + tcp_rack_detect_loss(sk, now); } -/* Record the most recently (re)sent time among the (s)acked packets */ -void tcp_rack_advance(struct tcp_sock *tp, - const struct skb_mstamp *xmit_time, u8 sacked) +/* Record the most recently (re)sent time among the (s)acked packets + * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from + * draft-cheng-tcpm-rack-00.txt + */ +void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, + const struct skb_mstamp *xmit_time, + const struct skb_mstamp *ack_time) { + u32 rtt_us; + if (tp->rack.mstamp.v64 && !skb_mstamp_after(xmit_time, &tp->rack.mstamp)) return; + rtt_us = skb_mstamp_us_delta(ack_time, xmit_time); if (sacked & TCPCB_RETRANS) { - struct skb_mstamp now; - /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior * retransmission) was sacked. @@ -110,11 +116,10 @@ void tcp_rack_advance(struct tcp_sock *tp, * so it's at least one RTT (i.e., retransmission is at least * an RTT later). */ - skb_mstamp_get(&now); - if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp)) + if (rtt_us < tcp_min_rtt(tp)) return; } - + tp->rack.rtt_us = rtt_us; tp->rack.mstamp = *xmit_time; tp->rack.advanced = 1; } -- cgit v1.2.3 From 57dde7f70de34d4251f291c9eac7ad920aaf56b2 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:33 -0800 Subject: tcp: add reordering timer in RACK loss detection This patch makes RACK install a reordering timer when it suspects some packets might be lost, but wants to delay the decision a little bit to accomodate reordering. It does not create a new timer but instead repurposes the existing RTO timer, because both are meant to retransmit packets. Specifically it arms a timer ICSK_TIME_REO_TIMEOUT when the RACK timing check fails. The wait time is set to RACK.RTT + RACK.reo_wnd - (NOW - Packet.xmit_time) + fudge This translates to expecting a packet (Packet) should take (RACK.RTT + RACK.reo_wnd + fudge) to deliver after it was sent. When there are multiple packets that need a timer, we use one timer with the maximum timeout. Therefore the timer conservatively uses the maximum window to expire N packets by one timeout, instead of N timeouts to expire N packets sent at different times. The fudge factor is 2 jiffies to ensure when the timer fires, all the suspected packets would exceed the deadline and be marked lost by tcp_rack_detect_loss(). It has to be at least 1 jiffy because the clock may tick between calling icsk_reset_xmit_timer(timeout) and actually hang the timer. The next jiffy is to lower-bound the timeout to 2 jiffies when reo_wnd is < 1ms. When the reordering timer fires (tcp_rack_reo_timeout): If we aren't in Recovery we'll enter fast recovery and force fast retransmit. This is very similar to the early retransmit (RFC5827) except RACK is not constrained to only enter recovery for small outstanding flights. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++- include/net/tcp.h | 4 +++ net/ipv4/inet_diag.c | 1 + net/ipv4/tcp_input.c | 6 ++-- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 +- net/ipv4/tcp_recovery.c | 57 +++++++++++++++++++++++++++++++++----- net/ipv4/tcp_timer.c | 3 ++ net/ipv6/tcp_ipv6.c | 1 + 9 files changed, 68 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 85ee3879499e..84b2edde09b1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -144,6 +144,7 @@ struct inet_connection_sock { #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ #define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ +#define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -234,7 +235,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || - what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || + what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/tcp.h b/include/net/tcp.h index 1439107658c2..64fcdeb3358b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -143,6 +143,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ +#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ @@ -397,6 +398,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); @@ -541,6 +543,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); +void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); @@ -1867,6 +1870,7 @@ extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, const struct skb_mstamp *xmit_time, const struct skb_mstamp *ack_time); +extern void tcp_rack_reo_timeout(struct sock *sk); /* * Save and compile IPv4 options, return a pointer to it diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4dea33e5f295..d216e40623d3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -216,6 +216,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ccd171999bf..be1191829963 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2522,8 +2522,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tcp_ecn_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, - int flag) +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; @@ -2691,7 +2690,7 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); -static void tcp_enter_recovery(struct sock *sk, bool ece_ack) +void tcp_enter_recovery(struct sock *sk, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); int mib_idx; @@ -3031,6 +3030,7 @@ void tcp_rearm_rto(struct sock *sk) u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 56d756ecfb59..ebf3e0c4967a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2230,6 +2230,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1d5331a1b1dc..0ba9026cb70d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2960,7 +2960,8 @@ begin_fwd: if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); - if (skb == tcp_write_queue_head(sk)) + if (skb == tcp_write_queue_head(sk) && + icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 557363cde58a..eb39b1b6d1dc 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -32,19 +32,18 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) * The current version is only used after recovery starts but can be * easily extended to detect the first loss. */ -static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now) +static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, + u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; u32 reo_wnd; + *reo_timeout = 0; /* To be more reordering resilient, allow min_rtt/4 settling delay * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed * RTT because reordering is often a path property and less related * to queuing or delayed ACKs. - * - * TODO: measure and adapt to the observed reordering delay, and - * use a timer to retransmit like the delayed early retransmit. */ reo_wnd = 1000; if (tp->rack.reord && tcp_min_rtt(tp) != ~0U) @@ -66,10 +65,23 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now) * A packet is lost if its elapsed time is beyond * the recent RTT plus the reordering window. */ - if (skb_mstamp_us_delta(now, &skb->skb_mstamp) > - tp->rack.rtt_us + reo_wnd) { + u32 elapsed = skb_mstamp_us_delta(now, + &skb->skb_mstamp); + s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; + + if (remaining < 0) { tcp_rack_mark_skb_lost(sk, skb); + continue; } + + /* Skip ones marked lost but not yet retransmitted */ + if ((scb->sacked & TCPCB_LOST) && + !(scb->sacked & TCPCB_SACKED_RETRANS)) + continue; + + /* Record maximum wait time (+1 to avoid 0) */ + *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); + } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early * b/c the rest are all sent after rack_sent @@ -82,12 +94,19 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now) void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) { struct tcp_sock *tp = tcp_sk(sk); + u32 timeout; if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) return; + /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; - tcp_rack_detect_loss(sk, now); + tcp_rack_detect_loss(sk, now, &timeout); + if (timeout) { + timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, + timeout, inet_csk(sk)->icsk_rto); + } } /* Record the most recently (re)sent time among the (s)acked packets @@ -123,3 +142,27 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, tp->rack.mstamp = *xmit_time; tp->rack.advanced = 1; } + +/* We have waited long enough to accommodate reordering. Mark the expired + * packets lost and retransmit them. + */ +void tcp_rack_reo_timeout(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct skb_mstamp now; + u32 timeout, prior_inflight; + + skb_mstamp_get(&now); + prior_inflight = tcp_packets_in_flight(tp); + tcp_rack_detect_loss(sk, &now, &timeout); + if (prior_inflight != tcp_packets_in_flight(tp)) { + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { + tcp_enter_recovery(sk, false); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_cwnd_reduction(sk, 1, 0); + } + tcp_xmit_retransmit_queue(sk); + } + if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) + tcp_rearm_rto(sk); +} diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 29a9bd5f1225..953c02a8566e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -563,6 +563,9 @@ void tcp_write_timer_handler(struct sock *sk) event = icsk->icsk_pending; switch (event) { + case ICSK_TIME_REO_TIMEOUT: + tcp_rack_reo_timeout(sk); + break; case ICSK_TIME_EARLY_RETRANS: tcp_resume_early_retransmit(sk); break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 228965dca3c5..f52c3742b404 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1746,6 +1746,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; -- cgit v1.2.3 From 1d0833df594390876647c54c2c88069d29059665 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:34 -0800 Subject: tcp: use sequence to break TS ties for RACK loss detection The packets inside a jumbo skb (e.g., TSO) share the same skb timestamp, even though they are sent sequentially on the wire. Since RACK is based on time, it can not detect some packets inside the same skb are lost. However, we can leverage the packet sequence numbers as extended timestamps to detect losses. Therefore, when RACK timestamp is identical to skb's timestamp (i.e., one of the packets of the skb is acked or sacked), we use the sequence numbers of the acked and unacked packets to break ties. We can use the same sequence logic to advance RACK xmit time as well to detect more losses and avoid timeout. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 5 +++-- net/ipv4/tcp_recovery.c | 17 ++++++++++++++--- 4 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1255c592719c..970d5f00589f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -208,6 +208,7 @@ struct tcp_sock { struct tcp_rack { struct skb_mstamp mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ + u32 end_seq; /* Ending TCP sequence of the skb */ u8 advanced; /* mstamp advanced since last lost marking */ u8 reord; /* reordering detected */ } rack; diff --git a/include/net/tcp.h b/include/net/tcp.h index 64fcdeb3358b..5fb1e75a32a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1867,7 +1867,7 @@ extern int sysctl_tcp_recovery; #define TCP_RACK_LOST_RETRANS 0x1 extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); -extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, +extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, const struct skb_mstamp *xmit_time, const struct skb_mstamp *ack_time); extern void tcp_rack_reo_timeout(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be1191829963..e42ca11c0326 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1218,7 +1218,8 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { - tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time); + tcp_rack_advance(tp, sacked, end_seq, + xmit_time, &state->ack_time); if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, @@ -3171,7 +3172,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } else if (tcp_is_sack(tp)) { tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) - tcp_rack_advance(tp, sacked, + tcp_rack_advance(tp, sacked, scb->end_seq, &skb->skb_mstamp, &sack->ack_time); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index eb39b1b6d1dc..1e330a2f913d 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -16,6 +16,14 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } +static bool tcp_rack_sent_after(const struct skb_mstamp *t1, + const struct skb_mstamp *t2, + u32 seq1, u32 seq2) +{ + return skb_mstamp_after(t1, t2) || + (t1->v64 == t2->v64 && after(seq1, seq2)); +} + /* Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK * but they look at different metrics: @@ -60,7 +68,8 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, scb->sacked & TCPCB_SACKED_ACKED) continue; - if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { + if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp, + tp->rack.end_seq, scb->end_seq)) { /* Step 3 in draft-cheng-tcpm-rack-00.txt: * A packet is lost if its elapsed time is beyond * the recent RTT plus the reordering window. @@ -113,14 +122,15 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from * draft-cheng-tcpm-rack-00.txt */ -void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, +void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, const struct skb_mstamp *xmit_time, const struct skb_mstamp *ack_time) { u32 rtt_us; if (tp->rack.mstamp.v64 && - !skb_mstamp_after(xmit_time, &tp->rack.mstamp)) + !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp, + end_seq, tp->rack.end_seq)) return; rtt_us = skb_mstamp_us_delta(ack_time, xmit_time); @@ -140,6 +150,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, } tp->rack.rtt_us = rtt_us; tp->rack.mstamp = *xmit_time; + tp->rack.end_seq = end_seq; tp->rack.advanced = 1; } -- cgit v1.2.3 From a0370b3f3f2cfb8b424b04c0545414abaa53f5ee Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:36 -0800 Subject: tcp: enable RACK loss detection to trigger recovery This patch changes two things: 1. Start fast recovery with RACK in addition to other heuristics (e.g., DUPACK threshold, FACK). Prior to this change RACK is enabled to detect losses only after the recovery has started by other algorithms. 2. Disable TCP early retransmit. RACK subsumes the early retransmit with the new reordering timer feature. A latter patch in this series removes the early retransmit code. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 11 ++++------- net/ipv4/tcp_input.c | 29 +++++++++++++++++++++-------- net/ipv4/tcp_recovery.c | 16 ++++++++++------ 3 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5fb1e75a32a9..423438dd6fe9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -262,6 +262,9 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; +extern int sysctl_tcp_recovery; +#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ + extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; @@ -1043,6 +1046,7 @@ static inline void tcp_enable_early_retrans(struct tcp_sock *tp) tp->do_early_retrans = sysctl_tcp_early_retrans && sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && + !(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) && net->ipv4.sysctl_tcp_reordering == 3; } @@ -1859,13 +1863,6 @@ void tcp_v4_init(void); void tcp_init(void); /* tcp_recovery.c */ - -/* Flags to enable various loss recovery features. See below */ -extern int sysctl_tcp_recovery; - -/* Use TCP RACK to detect (some) tail and retransmit losses */ -#define TCP_RACK_LOST_RETRANS 0x1 - extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, const struct skb_mstamp *xmit_time, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c98dc874825..4ad75b8c4fee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2129,10 +2129,25 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * F.e. after RTO, when all the queue is considered as lost, * lost_out = packets_out and in_flight = retrans_out. * - * Essentially, we have now two algorithms counting + * Essentially, we have now a few algorithms detecting * lost packets. * - * FACK: It is the simplest heuristics. As soon as we decided + * If the receiver supports SACK: + * + * RFC6675/3517: It is the conventional algorithm. A packet is + * considered lost if the number of higher sequence packets + * SACKed is greater than or equal the DUPACK thoreshold + * (reordering). This is implemented in tcp_mark_head_lost and + * tcp_update_scoreboard. + * + * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm + * (2017-) that checks timing instead of counting DUPACKs. + * Essentially a packet is considered lost if it's not S/ACKed + * after RTT + reordering_window, where both metrics are + * dynamically measured and adjusted. This is implemented in + * tcp_rack_mark_lost. + * + * FACK: it is the simplest heuristics. As soon as we decided * that something is lost, we decide that _all_ not SACKed * packets until the most forward SACK are lost. I.e. * lost_out = fackets_out - sacked_out and left_out = fackets_out. @@ -2141,16 +2156,14 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * takes place. We use FACK by default until reordering * is suspected on the path to this destination. * - * NewReno: when Recovery is entered, we assume that one segment + * If the receiver does not support SACK: + * + * NewReno (RFC6582): in Recovery we assume that one segment * is lost (classic Reno). While we are in Recovery and * a partial ACK arrives, we assume that one more packet * is lost (NewReno). This heuristics are the same in NewReno * and SACK. * - * Imagine, that's all! Forget about all this shamanism about CWND inflation - * deflation etc. CWND is real congestion window, never inflated, changes - * only according to classic VJ rules. - * * Really tricky (and requiring careful tuning) part of algorithm * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, @@ -2807,7 +2820,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag, struct tcp_sock *tp = tcp_sk(sk); /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) { + if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { u32 prior_retrans = tp->retrans_out; tcp_rack_mark_lost(sk, ack_time); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 1e330a2f913d..4ecb38ae8504 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -1,7 +1,7 @@ #include #include -int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS; +int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION; static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { @@ -24,7 +24,9 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1, (t1->v64 == t2->v64 && after(seq1, seq2)); } -/* Marks a packet lost, if some packet sent later has been (s)acked. +/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): + * + * Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK * but they look at different metrics: * @@ -37,8 +39,10 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1, * is being more resilient to reordering by simply allowing some * "settling delay", instead of tweaking the dupthresh. * - * The current version is only used after recovery starts but can be - * easily extended to detect the first loss. + * When tcp_rack_detect_loss() detects some packets are lost and we + * are not already in the CA_Recovery state, either tcp_rack_reo_timeout() + * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will + * make us enter the CA_Recovery state. */ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, u32 *reo_timeout) @@ -54,7 +58,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, * to queuing or delayed ACKs. */ reo_wnd = 1000; - if (tp->rack.reord && tcp_min_rtt(tp) != ~0U) + if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); tcp_for_write_queue(skb, sk) { @@ -105,7 +109,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) struct tcp_sock *tp = tcp_sk(sk); u32 timeout; - if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) + if (!tp->rack.advanced) return; /* Reset the advanced flag to avoid unnecessary queue scanning */ -- cgit v1.2.3 From bec41a11dd3dc8c54f766b4f494140ca92ba7c10 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:39 -0800 Subject: tcp: remove early retransmit This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 19 +++-------- include/linux/tcp.h | 3 +- include/net/tcp.h | 19 ----------- net/ipv4/inet_diag.c | 1 - net/ipv4/tcp.c | 3 -- net/ipv4/tcp_input.c | 60 ++-------------------------------- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_metrics.c | 1 - net/ipv4/tcp_minisocks.c | 1 - net/ipv4/tcp_output.c | 11 +++---- net/ipv4/tcp_timer.c | 3 -- net/ipv6/tcp_ipv6.c | 1 - 12 files changed, 12 insertions(+), 111 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 7dd65c9cf707..7de2cf79e16f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. tcp_early_retrans - INTEGER - Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold - for triggering fast retransmit when the amount of outstanding data is - small and when no previously unsent data can be transmitted (such - that limited transmit could be used). Also controls the use of - Tail loss probe (TLP) that converts RTOs occurring due to tail - losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). + Tail loss probe (TLP) converts RTOs occurring due to tail + losses into fast recovery (draft-ietf-tcpm-rack). Note that + TLP requires RACK to function properly (see tcp_recovery below) Possible values: - 0 disables ER - 1 enables ER - 2 enables ER but delays fast recovery and fast retransmit - by a fourth of RTT. This mitigates connection falsely - recovers when network has a small degree of reordering - (less than 3 packets). - 3 enables delayed ER and TLP. - 4 enables TLP only. + 0 disables TLP + 3 or 4 enables TLP Default: 3 tcp_ecn - INTEGER diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8e5f4c15d0e5..4733368f953a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,8 +224,7 @@ struct tcp_sock { repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; - u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - syn_data:1, /* SYN includes data */ + u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 423438dd6fe9..c55d65f74f7f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); /* tcp_input.c */ -void tcp_resume_early_retransmit(struct sock *sk); void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); @@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; } -/* TCP early-retransmit (ER) is similar to but more conservative than - * the thin-dupack feature. Enable ER only if thin-dupack is disabled. - */ -static inline void tcp_enable_early_retrans(struct tcp_sock *tp) -{ - struct net *net = sock_net((struct sock *)tp); - - tp->do_early_retrans = sysctl_tcp_early_retrans && - sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - !(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) && - net->ipv4.sysctl_tcp_reordering == 3; -} - -static inline void tcp_disable_early_retrans(struct tcp_sock *tp) -{ - tp->do_early_retrans = 0; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index d216e40623d3..3828b3a805cd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c8d46c140b4a..d9023e8ed53e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; - tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; else { tp->thin_dupack = val; - if (tp->thin_dupack) - tcp_disable_early_retrans(tp); } break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a041a92348ee..79c819077a59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tcp_disable_fack(tp); } - if (metric > 0) - tcp_disable_early_retrans(tp); tp->rack.reord = 1; } @@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static bool tcp_pause_early_retransmit(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned long delay; - - /* Delay early retransmit and entering fast recovery for - * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples - * available, or RTO is scheduled to fire first. - */ - if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt_us) - return false; - - delay = max(usecs_to_jiffies(tp->srtt_us >> 5), - msecs_to_jiffies(2)); - - if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) - return false; - - inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, - TCP_RTO_MAX); - return true; -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) tcp_is_sack(tp) && !tcp_send_head(sk)) return true; - /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious - * retransmissions due to small network reorderings, we implement - * Mitigation A.3 in the RFC and delay the retransmission for a short - * interval if appropriate. - */ - if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && - !tcp_may_send_now(sk)) - return !tcp_pause_early_retransmit(sk, flag); - return false; } @@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = @@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk) } } -/* This function is called when the delayed ER timer fires. TCP enters - * fast recovery and performs fast-retransmit. - */ -void tcp_resume_early_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_rearm_rto(sk); - - /* Stop if ER is disabled after the delayed ER timer is scheduled */ - if (!tp->do_early_retrans) - return; - - tcp_enter_recovery(sk, false); - tcp_update_scoreboard(sk, 1); - tcp_xmit_retransmit_queue(sk); -} - /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) skb_mstamp_get(&sack_state.ack_time); - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ebf3e0c4967a..63214136cf1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) int state; if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ba8f02d0f283..b9ed0d50aead 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk) val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) { tcp_disable_fack(tp); - tcp_disable_early_retrans(tp); tp->reordering = val; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 06fde26a82b7..bdb443471c39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->sacked_out = 0; newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; newtp->lsndtime = treq->snt_synack.stamp_jiffies; newsk->sk_txhash = treq->txhash; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6327e4d368a4..9a1a1494b9dd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); - } NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); @@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) - return false; /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { tcp_rearm_rto(sk); @@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || - !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + !tp->packets_out || !tcp_is_sack(tp) || + icsk->icsk_ca_state != TCP_CA_Open) return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 953c02a8566e..40d893556e67 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk) case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; - case ICSK_TIME_EARLY_RETRANS: - tcp_resume_early_retransmit(sk); - break; case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f52c3742b404..fc14e04028bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1745,7 +1745,6 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; -- cgit v1.2.3 From cac2661c53f35cbe651bef9b07026a5a05ab8ce0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 17 Jan 2017 10:22:57 +0100 Subject: esp4: Avoid skb_cow_data whenever possible This patch tries to avoid skb_cow_data on esp4. On the encrypt side we add the IPsec tailbits to the linear part of the buffer if there is space on it. If there is no space on the linear part, we add a page fragment with the tailbits to the buffer and use separate src and dst scatterlists. On the decrypt side, we leave the buffer as it is if it is not cloned. With this, we can avoid a linearization of the buffer in most of the cases. Joint work with: Sowmini Varadhan Ilan Tayari Signed-off-by: Sowmini Varadhan Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 + net/ipv4/esp4.c | 338 +++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 266 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c52197cf51dc..d9a81dcef53e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,8 @@ struct xfrm_state { /* Last used time */ unsigned long lastused; + struct page_frag xfrag; + /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 20fb25e3027b..9e8d97133513 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -18,6 +18,8 @@ #include #include +#include + struct esp_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; @@ -92,11 +94,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } +static void esp_ssg_unref(struct xfrm_state *x, void *tmp) +{ + struct esp_output_extra *extra = esp_tmp_extra(tmp); + struct crypto_aead *aead = x->data; + int extralen = 0; + u8 *iv; + struct aead_request *req; + struct scatterlist *sg; + + if (x->props.flags & XFRM_STATE_ESN) + extralen += sizeof(*extra); + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + + /* Unref skb_frag_pages in the src scatterlist if necessary. + * Skip the first sg which comes from skb->data. + */ + if (req->src != req->dst) + for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + put_page(sg_page(sg)); +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + void *tmp; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; - kfree(ESP_SKB_CB(skb)->tmp); + tmp = ESP_SKB_CB(skb)->tmp; + esp_ssg_unref(x, tmp); + kfree(tmp); xfrm_output_resume(skb, err); } @@ -120,6 +151,29 @@ static void esp_output_restore_header(struct sk_buff *skb) sizeof(__be32)); } +static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, + struct ip_esp_hdr *esph, + struct esp_output_extra *extra) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + } + + esph->spi = x->id.spi; + + return esph; +} + static void esp_output_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -130,16 +184,18 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { - int err; struct esp_output_extra *extra; + int err = -ENOMEM; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; - struct scatterlist *sg; + struct scatterlist *sg, *dsg; struct sk_buff *trailer; + struct page *page; void *tmp; u8 *iv; u8 *tail; + u8 *vaddr; int blksize; int clen; int alen; @@ -149,7 +205,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; int assoclen; int extralen; + int tailen; __be64 seqno; + __u8 proto = *skb_mac_header(skb); /* skb is pure payload to encrypt */ @@ -169,12 +227,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); plen = clen - skb->len - tfclen; - - err = skb_cow_data(skb, tfclen + plen + alen, &trailer); - if (err < 0) - goto error; - nfrags = err; - + tailen = tfclen + plen + alen; assoclen = sizeof(*esph); extralen = 0; @@ -183,35 +236,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, extralen); - if (!tmp) { - err = -ENOMEM; - goto error; - } - - extra = esp_tmp_extra(tmp); - iv = esp_tmp_iv(aead, tmp, extralen); - req = esp_tmp_req(aead, iv); - sg = esp_req_sg(aead, req); - - /* Fill padding... */ - tail = skb_tail_pointer(trailer); - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = *skb_mac_header(skb); - pskb_put(skb, trailer, clen - skb->len + alen); - - skb_push(skb, -skb_network_offset(skb)); - esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; + esph = ip_esp_hdr(skb); /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { @@ -230,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len - skb_transport_offset(skb)); + uh->len = htons(skb->len + tailen + - skb_transport_offset(skb)); uh->check = 0; switch (encap_type) { @@ -248,31 +275,170 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_UDP; } - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + if (!skb_cloned(skb)) { + if (tailen <= skb_availroom(skb)) { + nfrags = 1; + trailer = skb; + tail = skb_tail_pointer(trailer); - aead_request_set_callback(req, 0, esp_output_done, skb); + goto skip_cow; + } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) + && !skb_has_frag_list(skb)) { + int allocsize; + struct sock *sk = skb->sk; + struct page_frag *pfrag = &x->xfrag; - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * encryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - extra->esphoff = (unsigned char *)esph - - skb_transport_header(skb); - esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); - extra->seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - aead_request_set_callback(req, 0, esp_output_done_esn, skb); + allocsize = ALIGN(tailen, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto cow; + } + + page = pfrag->page; + get_page(page); + + vaddr = kmap_atomic(page); + + tail = vaddr + pfrag->offset; + + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; + + kunmap_atomic(vaddr); + + nfrags = skb_shinfo(skb)->nr_frags; + + __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, + tailen); + skb_shinfo(skb)->nr_frags = ++nfrags; + + pfrag->offset = pfrag->offset + allocsize; + nfrags++; + + skb->len += tailen; + skb->data_len += tailen; + skb->truesize += tailen; + if (sk) + atomic_add(tailen, &sk->sk_wmem_alloc); + + skb_push(skb, -skb_network_offset(skb)); + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; + + tmp = esp_alloc_tmp(aead, nfrags + 2, extralen); + if (!tmp) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = &sg[nfrags]; + + esph = esp_output_set_extra(skb, esph, extra); + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + spin_unlock_bh(&x->lock); + + goto skip_cow2; + } } +cow: + err = skb_cow_data(skb, tailen, &trailer); + if (err < 0) + goto error; + nfrags = err; + tail = skb_tail_pointer(trailer); + esph = ip_esp_hdr(skb); + +skip_cow: + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; + pskb_put(skb, trailer, clen - skb->len + alen); + + skb_push(skb, -skb_network_offset(skb)); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); esph->spi = x->id.spi; + tmp = esp_alloc_tmp(aead, nfrags, extralen); + if (!tmp) { + err = -ENOMEM; + goto error; + } + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = sg; + + esph = esp_output_set_extra(skb, esph, extra); + sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, assoclen + ivlen + clen + alen); - aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); +skip_cow2: + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_output_done, skb); + + aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + @@ -298,6 +464,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp_output_restore_header(skb); } + if (sg != dsg) + esp_ssg_unref(x, tmp); kfree(tmp); error: @@ -401,6 +569,23 @@ static void esp_input_restore_header(struct sk_buff *skb) __skb_pull(skb, 4); } +static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + } +} + static void esp_input_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -437,12 +622,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) if (elen <= 0) goto out; - err = skb_cow_data(skb, 0, &trailer); - if (err < 0) - goto out; - - nfrags = err; - assoclen = sizeof(*esph); seqhilen = 0; @@ -451,6 +630,26 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } + if (!skb_cloned(skb)) { + if (!skb_is_nonlinear(skb)) { + nfrags = 1; + + goto skip_cow; + } else if (!skb_has_frag_list(skb)) { + nfrags = skb_shinfo(skb)->nr_frags; + nfrags++; + + goto skip_cow; + } + } + + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) + goto out; + + nfrags = err; + +skip_cow: err = -ENOMEM; tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) @@ -462,26 +661,17 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - skb->ip_summed = CHECKSUM_NONE; + esp_input_set_header(skb, seqhi); - esph = (struct ip_esp_hdr *)skb->data; + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); - aead_request_set_callback(req, 0, esp_input_done, skb); + skb->ip_summed = CHECKSUM_NONE; - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * decryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); - *seqhi = esph->spi; - esph->spi = esph->seq_no; - esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_input_done_esn, skb); - } - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + else + aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); -- cgit v1.2.3 From aefb4d4ad83b608cb8e0cab8d3cd8e57d3f91feb Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Mon, 16 Jan 2017 14:16:36 +0000 Subject: net: AF-specific RTM_GETSTATS attributes Add the functionality for including address-family-specific per-link stats in RTM_GETSTATS messages. This is done through adding a new IFLA_STATS_AF_SPEC attribute under which address family attributes are nested and then the AF-specific attributes can be further nested. This follows the model of IFLA_AF_SPEC on RTM_*LINK messages and it has the advantage of presenting an easily extended hierarchy. The rtnl_af_ops structure is extended to provide AFs with the opportunity to fill and provide the size of their stats attributes. One alternative would have been to provide AFs with the ability to add attributes directly into the RTM_GETSTATS message without a nested hierarchy. I discounted this approach as it increases the rate at which the 32 attribute number space is used up and it makes implementation a little more tricky for stats dump resuming (at the moment the order in which attributes are added to the message has to match the numeric order of the attributes). Another alternative would have been to register per-AF RTM_GETSTATS handlers. I discounted this approach as I perceived a common use-case to be getting all the stats for an interface and this approach would necessitate multiple requests/dumps to retrieve them all. Signed-off-by: Robert Shearman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 4113916cc1bb..106de5f7bf06 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -139,6 +139,10 @@ struct rtnl_af_ops { const struct nlattr *attr); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr); + + int (*fill_stats_af)(struct sk_buff *skb, + const struct net_device *dev); + size_t (*get_stats_af_size)(const struct net_device *dev); }; void __rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6b13e591abc9..184b16ed2b84 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -847,6 +847,7 @@ enum { IFLA_STATS_LINK_XSTATS, IFLA_STATS_LINK_XSTATS_SLAVE, IFLA_STATS_LINK_OFFLOAD_XSTATS, + IFLA_STATS_AF_SPEC, __IFLA_STATS_MAX, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 75e3ea7bda08..f538f764fca6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3829,6 +3829,39 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, *idxattr = 0; } + if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) { + struct rtnl_af_ops *af_ops; + + *idxattr = IFLA_STATS_AF_SPEC; + attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC); + if (!attr) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_stats_af) { + struct nlattr *af; + int err; + + af = nla_nest_start(skb, af_ops->family); + if (!af) + goto nla_put_failure; + + err = af_ops->fill_stats_af(skb, dev); + + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, attr); + + *idxattr = 0; + } + nlmsg_end(skb, nlh); return 0; @@ -3885,6 +3918,23 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) size += rtnl_get_offload_stats_size(dev); + if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) { + struct rtnl_af_ops *af_ops; + + /* for IFLA_STATS_AF_SPEC */ + size += nla_total_size(0); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_stats_af_size) { + size += nla_total_size( + af_ops->get_stats_af_size(dev)); + + /* for AF_* */ + size += nla_total_size(0); + } + } + } + return size; } -- cgit v1.2.3 From fe38d2a1c8bee0b3a0be40de5b621a28200612e5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Jan 2017 07:51:01 -0800 Subject: inet: collapse ipv4/v6 rcv_saddr_equal functions into one We pass these per-protocol equal functions around in various places, but we can just have one function that checks the sk->sk_family and then do the right comparison function. I've also changed the ipv4 version to not cast to inet_sock since it is unneeded. Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 +-- include/net/inet_hashtables.h | 5 +-- include/net/udp.h | 1 - net/ipv4/inet_connection_sock.c | 72 ++++++++++++++++++++++++++++++++++++++++ net/ipv4/inet_hashtables.c | 16 +++------ net/ipv4/udp.c | 58 +++++++------------------------- net/ipv6/inet6_connection_sock.c | 4 +-- net/ipv6/inet6_hashtables.c | 46 +------------------------ net/ipv6/udp.c | 2 +- 9 files changed, 95 insertions(+), 113 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8f998afc1384..17c6fd84e287 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -88,9 +88,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, +int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 0574493e3899..756ed1692906 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -203,10 +203,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); -int __inet_hash(struct sock *sk, struct sock *osk, - int (*saddr_same)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)); +int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index 1661791e8ca1..c9d8b8e848e0 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -204,7 +204,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock *, const struct sock *, bool), unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 19ea045c50ed..ba597cb504ff 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -31,6 +31,78 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif +#if IS_ENABLED(CONFIG_IPV6) +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) +{ + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; + + /* if both are mapped, treat as IPv4 */ + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; + + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return 1; + + if (addr_type == IPV6_ADDR_ANY && match_wildcard && + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) + return 1; + + if (sk2_rcv_saddr6 && + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) + return 1; + + return 0; +} +#endif + +/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 + */ +static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) +{ + if (!ipv6_only_sock(sk2)) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; +} + +int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard); +#endif + return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard); +} +EXPORT_SYMBOL(inet_rcv_saddr_equal); + void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ca97835bfec4..2ef9b010bd34 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -435,10 +435,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) EXPORT_SYMBOL_GPL(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, - struct inet_listen_hashbucket *ilb, - int (*saddr_same)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)) + struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; @@ -451,7 +448,7 @@ static int inet_reuseport_add_sock(struct sock *sk, sk2->sk_bound_dev_if == sk->sk_bound_dev_if && inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && - saddr_same(sk, sk2, false)) + inet_rcv_saddr_equal(sk, sk2, false)) return reuseport_add_sock(sk, sk2); } @@ -461,10 +458,7 @@ static int inet_reuseport_add_sock(struct sock *sk, return 0; } -int __inet_hash(struct sock *sk, struct sock *osk, - int (*saddr_same)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)) +int __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; @@ -479,7 +473,7 @@ int __inet_hash(struct sock *sk, struct sock *osk, spin_lock(&ilb->lock); if (sk->sk_reuseport) { - err = inet_reuseport_add_sock(sk, ilb, saddr_same); + err = inet_reuseport_add_sock(sk, ilb); if (err) goto unlock; } @@ -503,7 +497,7 @@ int inet_hash(struct sock *sk) if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal); + err = __inet_hash(sk, NULL); local_bh_enable(); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4318d72e0248..d6dddcf59e79 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -137,11 +137,7 @@ EXPORT_SYMBOL(udp_memory_allocated); static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, - struct sock *sk, - int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard), - unsigned int log) + struct sock *sk, unsigned int log) { struct sock *sk2; kuid_t uid = sock_i_uid(sk); @@ -153,7 +149,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - saddr_comp(sk, sk2, true)) { + inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && uid_eq(uid, sock_i_uid(sk2))) { @@ -176,10 +172,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, */ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, - struct sock *sk, - int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)) + struct sock *sk) { struct sock *sk2; kuid_t uid = sock_i_uid(sk); @@ -193,7 +186,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - saddr_comp(sk, sk2, true)) { + inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && uid_eq(uid, sock_i_uid(sk2))) { @@ -208,10 +201,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, return res; } -static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, - int (*saddr_same)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)) +static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) { struct net *net = sock_net(sk); kuid_t uid = sock_i_uid(sk); @@ -225,7 +215,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && - (*saddr_same)(sk, sk2, false)) { + inet_rcv_saddr_equal(sk, sk2, false)) { return reuseport_add_sock(sk, sk2); } } @@ -241,14 +231,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, * * @sk: socket struct in question * @snum: port number to look up - * @saddr_comp: AF-dependent comparison of bound local IP addresses * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard), unsigned int hash2_nulladdr) { struct udp_hslot *hslot, *hslot2; @@ -277,7 +263,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, - saddr_comp, udptable->log); + udptable->log); snum = first; /* @@ -310,12 +296,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (hslot->count < hslot2->count) goto scan_primary_hash; - exist = udp_lib_lport_inuse2(net, snum, hslot2, - sk, saddr_comp); + exist = udp_lib_lport_inuse2(net, snum, hslot2, sk); if (!exist && (hash2_nulladdr != slot2)) { hslot2 = udp_hashslot2(udptable, hash2_nulladdr); exist = udp_lib_lport_inuse2(net, snum, hslot2, - sk, saddr_comp); + sk); } if (exist) goto fail_unlock; @@ -323,8 +308,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, goto found; } scan_primary_hash: - if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, - saddr_comp, 0)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0)) goto fail_unlock; } found: @@ -333,7 +317,7 @@ found: udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { if (sk->sk_reuseport && - udp_reuseport_add_sock(sk, hslot, saddr_comp)) { + udp_reuseport_add_sock(sk, hslot)) { inet_sk(sk)->inet_num = 0; udp_sk(sk)->udp_port_hash = 0; udp_sk(sk)->udp_portaddr_hash ^= snum; @@ -365,24 +349,6 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses - * match_wildcard == false: addresses must be exactly the same, i.e. - * 0.0.0.0 only equals to 0.0.0.0 - */ -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, - bool match_wildcard) -{ - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - - if (!ipv6_only_sock(sk2)) { - if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr) - return 1; - if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr) - return match_wildcard; - } - return 0; -} - static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) { @@ -398,7 +364,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; - return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); + return udp_lib_get_port(sk, snum, hash2_nulladdr); } static int compute_score(struct sock *sk, struct net *net, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7396e75e161b..55ee2ea2aee0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,12 +54,12 @@ int inet6_csk_bind_conflict(const struct sock *sk, (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2, true)) + if (inet_rcv_saddr_equal(sk, sk2, true)) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2, true)) + inet_rcv_saddr_equal(sk, sk2, true)) break; } } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 02761c9fe43e..d0900918a19e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -268,54 +268,10 @@ int inet6_hash(struct sock *sk) if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + err = __inet_hash(sk, NULL); local_bh_enable(); } return err; } EXPORT_SYMBOL_GPL(inet6_hash); - -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only - */ -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) -{ - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { - if (!sk2_ipv6only) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) - return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) - return match_wildcard; - } - return 0; - } - - if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && match_wildcard && - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) - return 1; - - return 0; -} -EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4d5c4eee4b3f..05d69324862e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -103,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); + return udp_lib_get_port(sk, snum, hash2_nulladdr); } static void udp_v6_rehash(struct sock *sk) -- cgit v1.2.3 From aa078842b702b4a45111f028a604a6c8f69cb27d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Jan 2017 07:51:02 -0800 Subject: inet: drop ->bind_conflict The only difference between inet6_csk_bind_conflict and inet_csk_bind_conflict is how they check the rcv_saddr, so delete this call back and simply change inet_csk_bind_conflict to call inet_rcv_saddr_equal. Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 5 ----- include/net/inet_connection_sock.h | 6 ------ net/dccp/ipv4.c | 1 - net/dccp/ipv6.c | 2 -- net/ipv4/inet_connection_sock.c | 22 +++++++------------- net/ipv4/tcp_ipv4.c | 1 - net/ipv6/inet6_connection_sock.c | 40 ------------------------------------- net/ipv6/tcp_ipv6.c | 2 -- 8 files changed, 7 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 3212b39b5bfc..8ec87b62257b 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,16 +15,11 @@ #include -struct inet_bind_bucket; struct request_sock; struct sk_buff; struct sock; struct sockaddr; -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool soreuseport_ok); - struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 84b2edde09b1..826f198374f8 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -62,9 +62,6 @@ struct inet_connection_sock_af_ops { char __user *optval, int __user *optlen); #endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); - int (*bind_conflict)(const struct sock *sk, - const struct inet_bind_bucket *tb, - bool relax, bool soreuseport_ok); void (*mtu_reduced)(struct sock *sk); }; @@ -263,9 +260,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); -int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool soreuseport_ok); int inet_csk_get_port(struct sock *sk, unsigned short snum); struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d859a5c36e70..b043ec833785 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -904,7 +904,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), - .bind_conflict = inet_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index adfc790f7193..08bcdc3d1717 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -937,7 +937,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@ -958,7 +957,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ba597cb504ff..a1c9055769fc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -116,9 +116,9 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) } EXPORT_SYMBOL(inet_get_local_port_range); -int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool reuseport_ok) +static int inet_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb, + bool relax, bool reuseport_ok) { struct sock *sk2; bool reuse = sk->sk_reuse; @@ -134,7 +134,6 @@ int inet_csk_bind_conflict(const struct sock *sk, sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && - !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { @@ -144,23 +143,18 @@ int inet_csk_bind_conflict(const struct sock *sk, rcu_access_pointer(sk->sk_reuseport_cb) || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { - - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || - sk2->sk_rcv_saddr == sk->sk_rcv_saddr) + if (inet_rcv_saddr_equal(sk, sk2, true)) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || - sk2->sk_rcv_saddr == sk->sk_rcv_saddr) + if (inet_rcv_saddr_equal(sk, sk2, true)) break; } } } return sk2 != NULL; } -EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. @@ -239,8 +233,7 @@ other_parity_scan: smallest_size = tb->num_owners; smallest_port = port; } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false, - reuseport_ok)) + if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok)) goto tb_found; goto next_port; } @@ -281,8 +274,7 @@ tb_found: sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) goto success; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true, - reuseport_ok)) { + if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) { if ((reuse || (tb->fastreuseport > 0 && sk->sk_reuseport && diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 63214136cf1c..3644fc117691 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1817,7 +1817,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), - .bind_conflict = inet_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 55ee2ea2aee0..97074c459fe6 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -28,46 +28,6 @@ #include #include -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool reuseport_ok) -{ - const struct sock *sk2; - bool reuse = !!sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; - kuid_t uid = sock_i_uid((struct sock *)sk); - - /* We must walk the whole port owner list in this case. -DaveM */ - /* - * See comment in inet_csk_bind_conflict about sock lookup - * vs net namespaces issues. - */ - sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, - sock_i_uid((struct sock *)sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && - sk2->sk_state != TCP_LISTEN && - inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - } - - return sk2 != NULL; -} -EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); - struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fc14e04028bf..f72100eedd5d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1621,7 +1621,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@ -1652,7 +1651,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, -- cgit v1.2.3 From b9470c27607bed1ad3450de789c154f225530112 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Jan 2017 07:51:03 -0800 Subject: inet: kill smallest_size and smallest_port In inet_csk_get_port we seem to be using smallest_port to figure out where the best place to look for a SO_REUSEPORT sk that matches with an existing set of SO_REUSEPORT's. However if we get to the logic if (smallest_size != -1) { port = smallest_port; goto have_port; } we will do a useless search, because we would have already done the inet_csk_bind_conflict for that port and it would have returned 1, otherwise we would have gone to found_tb and succeeded. Since this logic makes us do yet another trip through inet_csk_bind_conflict for a port we know won't work just delete this code and save us the time. Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 1 - net/ipv4/inet_connection_sock.c | 26 ++++---------------------- net/ipv4/inet_hashtables.c | 3 --- 3 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 756ed1692906..3fc0366743da 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -80,7 +80,6 @@ struct inet_bind_bucket { signed char fastreuse; signed char fastreuseport; kuid_t fastuid; - int num_owners; struct hlist_node node; struct hlist_head owners; }; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a1c9055769fc..d3523661c905 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -165,7 +165,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; int ret = 1, attempts = 5, port = snum; - int smallest_size = -1, smallest_port; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); int i, low, high, attempt_half; @@ -175,7 +174,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) bool reuseport_ok = !!snum; if (port) { -have_port: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); @@ -209,8 +207,6 @@ other_half_scan: * We do the opposite to not pollute connect() users. */ offset |= 1U; - smallest_size = -1; - smallest_port = low; /* avoid compiler warning */ other_parity_scan: port = low + offset; @@ -224,15 +220,6 @@ other_parity_scan: spin_lock_bh(&head->lock); inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == port) { - if (((tb->fastreuse > 0 && reuse) || - (tb->fastreuseport > 0 && - sk->sk_reuseport && - !rcu_access_pointer(sk->sk_reuseport_cb) && - uid_eq(tb->fastuid, uid))) && - (tb->num_owners < smallest_size || smallest_size == -1)) { - smallest_size = tb->num_owners; - smallest_port = port; - } if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok)) goto tb_found; goto next_port; @@ -243,10 +230,6 @@ next_port: cond_resched(); } - if (smallest_size != -1) { - port = smallest_port; - goto have_port; - } offset--; if (!(offset & 1)) goto other_parity_scan; @@ -268,19 +251,18 @@ tb_found: if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (((tb->fastreuse > 0 && reuse) || + if ((tb->fastreuse > 0 && reuse) || (tb->fastreuseport > 0 && !rcu_access_pointer(sk->sk_reuseport_cb) && - sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && - smallest_size == -1) + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) goto success; if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) { if ((reuse || (tb->fastreuseport > 0 && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && - uid_eq(tb->fastuid, uid))) && - !snum && smallest_size != -1 && --attempts >= 0) { + uid_eq(tb->fastuid, uid))) && !snum && + --attempts >= 0) { spin_unlock_bh(&head->lock); goto again; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2ef9b010bd34..8bea74298173 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -73,7 +73,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; - tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); } @@ -96,7 +95,6 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, { inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); - tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb; } @@ -114,7 +112,6 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); - tb->num_owners--; inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); -- cgit v1.2.3 From 637bc8bbe6c0a288a596edfdcdd5657c72a848db Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Jan 2017 07:51:06 -0800 Subject: inet: reset tb->fastreuseport when adding a reuseport sk If we have non reuseport sockets on a tb we will set tb->fastreuseport to 0 and never set it again. Which means that in the future if we end up adding a bunch of reuseport sk's to that tb we'll have to do the expensive scan every time. Instead add the ipv4/ipv6 saddr fields to the bind bucket, as well as the family so we know what comparison to make, and the ipv6 only setting so we can make sure to compare with new sockets appropriately. Once one sk has made it onto the list we know that there are no potential bind conflicts on the owners list that match that sk's rcv_addr. So copy the sk's information into our bind bucket and set tb->fastruseport to FASTREUSESOCK_STRICT so we know we have to do an extra check for subsequent reuseport sockets and skip the expensive bind conflict check. Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 9 ++++ net/ipv4/inet_connection_sock.c | 106 ++++++++++++++++++++++++++++++++-------- 2 files changed, 95 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3fc0366743da..1178931288cb 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -74,12 +74,21 @@ struct inet_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ +#define FASTREUSEPORT_ANY 1 +#define FASTREUSEPORT_STRICT 2 + struct inet_bind_bucket { possible_net_t ib_net; unsigned short port; signed char fastreuse; signed char fastreuseport; kuid_t fastuid; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr fast_v6_rcv_saddr; +#endif + __be32 fast_rcv_saddr; + unsigned short fast_sk_family; + bool fast_ipv6_only; struct hlist_node node; struct hlist_head owners; }; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bbe28920e2d8..096a085611ab 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -38,20 +38,21 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, * and 0.0.0.0 equals to 0.0.0.0 only */ -static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, +static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, + const struct in6_addr *sk2_rcv_saddr6, + __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk1_ipv6only, bool sk2_ipv6only, bool match_wildcard) { - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { if (!sk2_ipv6only) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + if (sk1_rcv_saddr == sk2_rcv_saddr) return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } return 0; @@ -65,11 +66,11 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, return 1; if (addr_type == IPV6_ADDR_ANY && match_wildcard && - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) + !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) + ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) return 1; return 0; @@ -80,13 +81,13 @@ static int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, * match_wildcard == false: addresses must be exactly the same, i.e. * 0.0.0.0 only equals to 0.0.0.0 */ -static int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) +static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk2_ipv6only, bool match_wildcard) { - if (!ipv6_only_sock(sk2)) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + if (!sk2_ipv6only) { + if (sk1_rcv_saddr == sk2_rcv_saddr) return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } return 0; @@ -97,9 +98,16 @@ int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - return ipv6_rcv_saddr_equal(sk, sk2, match_wildcard); + return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, + &sk2->sk_v6_rcv_saddr, + sk->sk_rcv_saddr, + sk2->sk_rcv_saddr, + ipv6_only_sock(sk), + ipv6_only_sock(sk2), + match_wildcard); #endif - return ipv4_rcv_saddr_equal(sk, sk2, match_wildcard); + return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, + ipv6_only_sock(sk2), match_wildcard); } EXPORT_SYMBOL(inet_rcv_saddr_equal); @@ -234,6 +242,39 @@ success: return head; } +static inline int sk_reuseport_match(struct inet_bind_bucket *tb, + struct sock *sk) +{ + kuid_t uid = sock_i_uid(sk); + + if (tb->fastreuseport <= 0) + return 0; + if (!sk->sk_reuseport) + return 0; + if (rcu_access_pointer(sk->sk_reuseport_cb)) + return 0; + if (!uid_eq(tb->fastuid, uid)) + return 0; + /* We only need to check the rcv_saddr if this tb was once marked + * without fastreuseport and then was reset, as we can only know that + * the fast_*rcv_saddr doesn't have any conflicts with the socks on the + * owners list. + */ + if (tb->fastreuseport == FASTREUSEPORT_ANY) + return 1; +#if IS_ENABLED(CONFIG_IPV6) + if (tb->fast_sk_family == AF_INET6) + return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, + &sk->sk_v6_rcv_saddr, + tb->fast_rcv_saddr, + sk->sk_rcv_saddr, + tb->fast_ipv6_only, + ipv6_only_sock(sk), true); +#endif + return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, + ipv6_only_sock(sk), true); +} + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. * We try to allocate an odd port (and leave even ports for connect()) @@ -273,9 +314,7 @@ tb_found: goto success; if ((tb->fastreuse > 0 && reuse) || - (tb->fastreuseport > 0 && - !rcu_access_pointer(sk->sk_reuseport_cb) && - sk->sk_reuseport && uid_eq(tb->fastuid, uid))) + sk_reuseport_match(tb, sk)) goto success; if (inet_csk_bind_conflict(sk, tb, true, true)) goto fail_unlock; @@ -284,16 +323,43 @@ success: if (!hlist_empty(&tb->owners)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { - tb->fastreuseport = 1; + tb->fastreuseport = FASTREUSEPORT_ANY; tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif } else { tb->fastreuseport = 0; } } else { if (!reuse) tb->fastreuse = 0; - if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)) + if (sk->sk_reuseport) { + /* We didn't match or we don't have fastreuseport set on + * the tb, but we have sk_reuseport set on this socket + * and we know that there are no bind conflicts with + * this socket in this tb, so reset our tb's reuseport + * settings so that any subsequent sockets that match + * our current socket will be put on the fast path. + * + * If we reset we need to set FASTREUSEPORT_STRICT so we + * do extra checking for all subsequent sk_reuseport + * socks. + */ + if (!sk_reuseport_match(tb, sk)) { + tb->fastreuseport = FASTREUSEPORT_STRICT; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } + } else { tb->fastreuseport = 0; + } } if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, port); -- cgit v1.2.3 From cc16f00f6529aa2378f2b949a6f68e9dc6dec363 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:42 +0800 Subject: sctp: add support for generating stream reconf ssn reset request chunk This patch is to add asoc strreset_outseq and strreset_inseq for saving the reconf request sequence, initialize them when create assoc and process init, and also to define Incoming and Outgoing SSN Reset Request Parameter described in rfc6525 section 4.1 and 4.2, As they can be in one same chunk as section rfc6525 3.1-3 describes, it makes them in one function. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 27 ++++++++++ include/net/sctp/sm.h | 5 +- include/net/sctp/structs.h | 3 ++ net/sctp/associola.c | 1 + net/sctp/sm_make_chunk.c | 121 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 156 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index fcb4c3646173..a9e790685af3 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -108,6 +108,7 @@ typedef enum { /* Use hex, as defined in ADDIP sec. 3.1 */ SCTP_CID_ASCONF = 0xC1, SCTP_CID_ASCONF_ACK = 0x80, + SCTP_CID_RECONF = 0x82, } sctp_cid_t; /* enum */ @@ -199,6 +200,13 @@ typedef enum { SCTP_PARAM_SUCCESS_REPORT = cpu_to_be16(0xc005), SCTP_PARAM_ADAPTATION_LAYER_IND = cpu_to_be16(0xc006), + /* RE-CONFIG. Section 4 */ + SCTP_PARAM_RESET_OUT_REQUEST = cpu_to_be16(0x000d), + SCTP_PARAM_RESET_IN_REQUEST = cpu_to_be16(0x000e), + SCTP_PARAM_RESET_TSN_REQUEST = cpu_to_be16(0x000f), + SCTP_PARAM_RESET_RESPONSE = cpu_to_be16(0x0010), + SCTP_PARAM_RESET_ADD_OUT_STREAMS = cpu_to_be16(0x0011), + SCTP_PARAM_RESET_ADD_IN_STREAMS = cpu_to_be16(0x0012), } sctp_param_t; /* enum */ @@ -710,4 +718,23 @@ struct sctp_infox { struct sctp_association *asoc; }; +struct sctp_reconf_chunk { + sctp_chunkhdr_t chunk_hdr; + __u8 params[0]; +} __packed; + +struct sctp_strreset_outreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u32 response_seq; + __u32 send_reset_at_tsn; + __u16 list_of_streams[0]; +} __packed; + +struct sctp_strreset_inreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u16 list_of_streams[0]; +} __packed; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index ca6c971dd74a..3462cb08f51a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -259,7 +259,10 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); - +struct sctp_chunk *sctp_make_strreset_req( + const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in); void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4741ec240caf..3dc983e97564 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1865,6 +1865,9 @@ struct sctp_association { temp:1, /* Is it a temporary association? */ prsctp_enable:1; + __u32 strreset_outseq; /* Update after receiving response */ + __u32 strreset_inseq; /* Update after receiving request */ + struct sctp_priv_assoc_stats stats; int sent_cnt_removable; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 36294f7fb9a7..42ece6f35b98 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -207,6 +207,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * association to the same value as the initial TSN. */ asoc->addip_serial = asoc->c.initial_tsn; + asoc->strreset_outseq = asoc->c.initial_tsn; INIT_LIST_HEAD(&asoc->addip_chunk_list); INIT_LIST_HEAD(&asoc->asconf_ack_list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 80a9088084ac..df81fce08890 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1844,6 +1844,7 @@ no_hmac: retval->next_tsn = retval->c.initial_tsn; retval->ctsn_ack_point = retval->next_tsn - 1; retval->addip_serial = retval->c.initial_tsn; + retval->strreset_outseq = retval->c.initial_tsn; retval->adv_peer_ack_point = retval->ctsn_ack_point; retval->peer.prsctp_capable = retval->c.prsctp_capable; retval->peer.adaptation_ind = retval->c.adaptation_ind; @@ -2387,6 +2388,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, asoc->peer.i.initial_tsn = ntohl(peer_init->init_hdr.initial_tsn); + asoc->strreset_inseq = asoc->peer.i.initial_tsn; + /* Apply the upper bounds for output streams based on peer's * number of inbound streams. */ @@ -3524,3 +3527,121 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, return retval; } + +/* RE-CONFIG 3.1 (RE-CONFIG chunk) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 130 | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Re-configuration Parameter / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Re-configuration Parameter (optional) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +static struct sctp_chunk *sctp_make_reconf( + const struct sctp_association *asoc, + int length) +{ + struct sctp_reconf_chunk *reconf; + struct sctp_chunk *retval; + + retval = sctp_make_control(asoc, SCTP_CID_RECONF, 0, length, + GFP_ATOMIC); + if (!retval) + return NULL; + + reconf = (struct sctp_reconf_chunk *)retval->chunk_hdr; + retval->param_hdr.v = reconf->params; + + return retval; +} + +/* RE-CONFIG 4.1 (STREAM OUT RESET) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 13 | Parameter Length = 16 + 2 * N | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Response Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sender's Last Assigned TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Number 1 (optional) | Stream Number 2 (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / ...... / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Number N-1 (optional) | Stream Number N (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * RE-CONFIG 4.2 (STREAM IN RESET) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 14 | Parameter Length = 8 + 2 * N | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Number 1 (optional) | Stream Number 2 (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / ...... / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Number N-1 (optional) | Stream Number N (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_req( + const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in) +{ + struct sctp_strreset_outreq outreq; + __u16 stream_len = stream_num * 2; + struct sctp_strreset_inreq inreq; + struct sctp_chunk *retval; + __u16 outlen, inlen, i; + + outlen = (sizeof(outreq) + stream_len) * out; + inlen = (sizeof(inreq) + stream_len) * in; + + retval = sctp_make_reconf(asoc, outlen + inlen); + if (!retval) + return NULL; + + for (i = 0; i < stream_num; i++) + stream_list[i] = htons(stream_list[i]); + + if (outlen) { + outreq.param_hdr.type = SCTP_PARAM_RESET_OUT_REQUEST; + outreq.param_hdr.length = htons(outlen); + outreq.request_seq = htonl(asoc->strreset_outseq); + outreq.response_seq = htonl(asoc->strreset_inseq - 1); + outreq.send_reset_at_tsn = htonl(asoc->next_tsn - 1); + + sctp_addto_chunk(retval, sizeof(outreq), &outreq); + + if (stream_len) + sctp_addto_chunk(retval, stream_len, stream_list); + } + + if (inlen) { + inreq.param_hdr.type = SCTP_PARAM_RESET_IN_REQUEST; + inreq.param_hdr.length = htons(inlen); + inreq.request_seq = htonl(asoc->strreset_outseq + out); + + sctp_addto_chunk(retval, sizeof(inreq), &inreq); + + if (stream_len) + sctp_addto_chunk(retval, stream_len, stream_list); + } + + for (i = 0; i < stream_num; i++) + stream_list[i] = ntohs(stream_list[i]); + + return retval; +} -- cgit v1.2.3 From 7b9438de0cd4b46a6914416bfede6cf839cd9e68 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:43 +0800 Subject: sctp: add stream reconf timer This patch is to add a per transport timer based on sctp timer frame for stream reconf chunk retransmission. It would start after sending a reconf request chunk, and stop after receiving the response chunk. If the timer expires, besides retransmitting the reconf request chunk, it would also do the same thing with data RTO timer. like to increase the appropriate error counts, and perform threshold management, possibly destroying the asoc if sctp retransmission thresholds are exceeded, just as section 5.1.1 describes. This patch is also to add asoc strreset_chunk, it is used to save the reconf request chunk, so that it can be retransmitted, and to check if the response is really for this request by comparing the information inside with the response chunk as well. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 1 + include/net/sctp/sm.h | 2 ++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 9 +++++++++ net/sctp/sm_sideeffect.c | 32 ++++++++++++++++++++++++++++++++ net/sctp/sm_statefuns.c | 28 ++++++++++++++++++++++++++++ net/sctp/sm_statetable.c | 20 ++++++++++++++++++++ net/sctp/transport.c | 17 +++++++++++++++-- 8 files changed, 113 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5b847e49f7e9..8307c862b5c2 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -90,6 +90,7 @@ typedef enum { SCTP_EVENT_TIMEOUT_T4_RTO, SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD, SCTP_EVENT_TIMEOUT_HEARTBEAT, + SCTP_EVENT_TIMEOUT_RECONF, SCTP_EVENT_TIMEOUT_SACK, SCTP_EVENT_TIMEOUT_AUTOCLOSE, } sctp_event_timeout_t; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 3462cb08f51a..d2d9e28fe783 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -167,6 +167,7 @@ sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort; /* Prototypes for timeout event state functions. */ sctp_state_fn_t sctp_sf_do_6_3_3_rtx; +sctp_state_fn_t sctp_sf_send_reconf; sctp_state_fn_t sctp_sf_do_6_2_sack; sctp_state_fn_t sctp_sf_autoclose_timer_expire; @@ -278,6 +279,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); void sctp_generate_heartbeat_event(unsigned long peer); +void sctp_generate_reconf_event(unsigned long peer); void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3dc983e97564..463b4d642d68 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -877,6 +877,9 @@ struct sctp_transport { /* Timer to handle ICMP proto unreachable envets */ struct timer_list proto_unreach_timer; + /* Timer to handler reconf chunk rtx */ + struct timer_list reconf_timer; + /* Since we're using per-destination retransmission timers * (see above), we're also using per-destination "transmitted" * queues. This probably ought to be a private struct @@ -935,6 +938,7 @@ void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk); void sctp_transport_free(struct sctp_transport *); void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); +void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1868,6 +1872,8 @@ struct sctp_association { __u32 strreset_outseq; /* Update after receiving response */ __u32 strreset_inseq; /* Update after receiving request */ + struct sctp_chunk *strreset_chunk; /* save request chunk */ + struct sctp_priv_assoc_stats stats; int sent_cnt_removable; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 42ece6f35b98..fc33540d2f11 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -362,6 +362,9 @@ void sctp_association_free(struct sctp_association *asoc) /* Free stream information. */ sctp_stream_free(asoc->stream); + if (asoc->strreset_chunk) + sctp_chunk_free(asoc->strreset_chunk); + /* Clean up the bound address list. */ sctp_bind_addr_free(&asoc->base.bind_addr); @@ -520,6 +523,12 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, if (asoc->peer.last_data_from == peer) asoc->peer.last_data_from = transport; + if (asoc->strreset_chunk && + asoc->strreset_chunk->transport == peer) { + asoc->strreset_chunk->transport = transport; + sctp_transport_reset_reconf_timer(transport); + } + /* If we remove the transport an INIT was last sent to, set it to * NULL. Combined with the update of the retran path above, this * will cause the next INIT to be sent to the next available diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c345bf153bed..a4552712b882 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -436,6 +436,37 @@ out_unlock: sctp_association_put(asoc); } + /* Handle the timeout of the RE-CONFIG timer. */ +void sctp_generate_reconf_event(unsigned long data) +{ + struct sctp_transport *transport = (struct sctp_transport *)data; + struct sctp_association *asoc = transport->asoc; + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); + int error = 0; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy\n", __func__); + + /* Try again later. */ + if (!mod_timer(&transport->reconf_timer, jiffies + (HZ / 20))) + sctp_transport_hold(transport); + goto out_unlock; + } + + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, + SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), + asoc->state, asoc->ep, asoc, + transport, GFP_ATOMIC); + + if (error) + sk->sk_err = -error; + +out_unlock: + bh_unlock_sock(sk); + sctp_transport_put(transport); +} /* Inject a SACK Timeout event into the state machine. */ static void sctp_generate_sack_event(unsigned long data) @@ -453,6 +484,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { sctp_generate_t4_rto_event, sctp_generate_t5_shutdown_guard_event, NULL, + NULL, sctp_generate_sack_event, sctp_generate_autoclose_event, }; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0ceded37d20b..2ae186aba9a8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1021,6 +1021,34 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, return SCTP_DISPOSITION_CONSUME; } +/* resend asoc strreset_chunk. */ +sctp_disposition_t sctp_sf_send_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_transport *transport = arg; + + if (asoc->overall_error_count >= asoc->max_retrans) { + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ETIMEDOUT)); + /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_NO_ERROR)); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + return SCTP_DISPOSITION_DELETE_TCB; + } + + sctp_chunk_hold(asoc->strreset_chunk); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(asoc->strreset_chunk)); + sctp_add_cmd_sf(commands, SCTP_CMD_STRIKE, SCTP_TRANSPORT(transport)); + + return SCTP_DISPOSITION_CONSUME; +} + /* * Process an heartbeat request. * diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index a987d54b379c..3da521abfc57 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -888,6 +888,25 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } +#define TYPE_SCTP_EVENT_TIMEOUT_RECONF { \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_send_reconf), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ +} + static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_EVENT_TIMEOUT_NONE, TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE, @@ -897,6 +916,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_T4_RTO, TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD, TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT, + TYPE_SCTP_EVENT_TIMEOUT_RECONF, TYPE_SCTP_EVENT_TIMEOUT_SACK, TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a1652ab63918..baa1ac00d7b5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -88,9 +88,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net, INIT_LIST_HEAD(&peer->transports); setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, - (unsigned long)peer); + (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, - (unsigned long)peer); + (unsigned long)peer); + setup_timer(&peer->reconf_timer, sctp_generate_reconf_event, + (unsigned long)peer); setup_timer(&peer->proto_unreach_timer, sctp_generate_proto_unreach_event, (unsigned long)peer); @@ -144,6 +146,9 @@ void sctp_transport_free(struct sctp_transport *transport) if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); + if (del_timer(&transport->reconf_timer)) + sctp_transport_put(transport); + /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) sctp_association_put(transport->asoc); @@ -211,6 +216,14 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) sctp_transport_hold(transport); } +void sctp_transport_reset_reconf_timer(struct sctp_transport *transport) +{ + if (!timer_pending(&transport->reconf_timer)) + if (!mod_timer(&transport->reconf_timer, + jiffies + transport->rto)) + sctp_transport_hold(transport); +} + /* This transport has been assigned to an association. * Initialize fields from the association or from the sock itself. * Register the reference count in the association. -- cgit v1.2.3 From 7a090b04522b46a219c271d4cd2abbf572623e03 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:44 +0800 Subject: sctp: add stream reconf primitive This patch is to add a primitive based on sctp primitive frame for sending stream reconf request. It works as the other primitives, and create a SCTP_CMD_REPLY command to send the request chunk out. sctp_primitive_RECONF would be the api to send a reconf request chunk. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 3 ++- include/net/sctp/sctp.h | 2 ++ include/net/sctp/sm.h | 1 + net/sctp/primitive.c | 3 +++ net/sctp/sm_statefuns.c | 13 +++++++++++++ net/sctp/sm_statetable.c | 20 ++++++++++++++++++++ 6 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8307c862b5c2..3567c971cf3b 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -114,9 +114,10 @@ typedef enum { SCTP_PRIMITIVE_SEND, SCTP_PRIMITIVE_REQUESTHEARTBEAT, SCTP_PRIMITIVE_ASCONF, + SCTP_PRIMITIVE_RECONF, } sctp_event_primitive_t; -#define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_ASCONF +#define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_RECONF #define SCTP_NUM_PRIMITIVE_TYPES (SCTP_EVENT_PRIMITIVE_MAX + 1) /* We define here a utility type for manipulating subtypes. diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 598d938b0d0a..bc0e049b1474 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -141,6 +141,8 @@ int sctp_primitive_ABORT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_SEND(struct net *, struct sctp_association *, void *arg); int sctp_primitive_REQUESTHEARTBEAT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg); +int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, + void *arg); /* * sctp/input.c diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index d2d9e28fe783..430ed139fbbb 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -157,6 +157,7 @@ sctp_state_fn_t sctp_sf_error_shutdown; sctp_state_fn_t sctp_sf_ignore_primitive; sctp_state_fn_t sctp_sf_do_prm_requestheartbeat; sctp_state_fn_t sctp_sf_do_prm_asconf; +sctp_state_fn_t sctp_sf_do_prm_reconf; /* Prototypes for other event state functions. */ sctp_state_fn_t sctp_sf_do_no_pending_tsn; diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index ab8d9f96a177..f0553a022859 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -211,3 +211,6 @@ DECLARE_PRIMITIVE(REQUESTHEARTBEAT); */ DECLARE_PRIMITIVE(ASCONF); + +/* RE-CONFIG 5.1 */ +DECLARE_PRIMITIVE(RECONF); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2ae186aba9a8..782e579472c9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5185,6 +5185,19 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +/* RE-CONFIG Section 5.1 RECONF Chunk Procedures */ +sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(chunk)); + return SCTP_DISPOSITION_CONSUME; +} + /* * Ignore the primitive event * diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 3da521abfc57..b5438b4f6c1e 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -643,6 +643,25 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ } /* TYPE_SCTP_PRIMITIVE_ASCONF */ +#define TYPE_SCTP_PRIMITIVE_RECONF { \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_reconf), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_reconf), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_reconf), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_reconf), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ +} /* TYPE_SCTP_PRIMITIVE_RECONF */ + /* The primary index for this table is the primitive type. * The secondary index for this table is the state. */ @@ -653,6 +672,7 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE TYPE_SCTP_PRIMITIVE_SEND, TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT, TYPE_SCTP_PRIMITIVE_ASCONF, + TYPE_SCTP_PRIMITIVE_RECONF, }; #define TYPE_SCTP_OTHER_NO_PENDING_TSN { \ -- cgit v1.2.3 From c28445c3cb07ba1da2c1dc7b5f3066c686a6acc6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:45 +0800 Subject: sctp: add reconf_enable in asoc ep and netns This patch is to add reconf_enable field in all of asoc ep and netns to indicate if they support stream reset. When initializing, asoc reconf_enable get the default value from ep reconf_enable which is from netns netns reconf_enable by default. It is also to add reconf_capable in asoc peer part to know if peer supports reconf_enable, the value is set if ext params have reconf chunk support when processing init chunk, just as rfc6525 section 5.1.1 demands. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 3 +++ include/net/sctp/structs.h | 7 +++++-- net/sctp/associola.c | 1 + net/sctp/endpointola.c | 1 + net/sctp/protocol.c | 3 +++ net/sctp/sm_make_chunk.c | 15 +++++++++++++++ 6 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index c501d67172b1..b7871d018354 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -118,6 +118,9 @@ struct netns_sctp { /* Flag to indicate if PR-SCTP is enabled. */ int prsctp_enable; + /* Flag to indicate if PR-CONFIG is enabled. */ + int reconf_enable; + /* Flag to idicate if SCTP-AUTH is enabled */ int auth_enable; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 463b4d642d68..ee037ef15d65 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1255,7 +1255,8 @@ struct sctp_endpoint { struct list_head endpoint_shared_keys; __u16 active_key_id; __u8 auth_enable:1, - prsctp_enable:1; + prsctp_enable:1, + reconf_enable:1; }; /* Recover the outter endpoint structure. */ @@ -1508,6 +1509,7 @@ struct sctp_association { hostname_address:1, /* Peer understands DNS addresses? */ asconf_capable:1, /* Does peer support ADDIP? */ prsctp_capable:1, /* Can peer do PR-SCTP? */ + reconf_capable:1, /* Can peer do RE-CONFIG? */ auth_capable:1; /* Is peer doing SCTP-AUTH? */ /* sack_needed : This flag indicates if the next received @@ -1867,7 +1869,8 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ - prsctp_enable:1; + prsctp_enable:1, + reconf_enable:1; __u32 strreset_outseq; /* Update after receiving response */ __u32 strreset_inseq; /* Update after receiving request */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fc33540d2f11..68b99adc21a3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -270,6 +270,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; + asoc->reconf_enable = ep->reconf_enable; /* Save the hmacs and chunks list into this association */ if (ep->auth_hmacs_list) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 410ddc1e3443..8c589230794f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -164,6 +164,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->auth_hmacs_list = auth_hmacs; ep->auth_chunk_list = auth_chunks; ep->prsctp_enable = net->sctp.prsctp_enable; + ep->reconf_enable = net->sctp.reconf_enable; return ep; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f9c3c37c9ae0..8227bbbd077a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Enable PR-SCTP by default. */ net->sctp.prsctp_enable = 1; + /* Disable RECONF by default. */ + net->sctp.reconf_enable = 0; + /* Disable AUTH by default. */ net->sctp.auth_enable = 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index df81fce08890..ad3445b3408e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -270,6 +270,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_ext += 2; } + if (asoc->reconf_enable) { + extensions[num_ext] = SCTP_CID_RECONF; + num_ext += 1; + } + if (sp->adaptation_ind) chunksize += sizeof(aiparam); @@ -434,6 +439,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, num_ext += 2; } + if (asoc->peer.reconf_capable) { + extensions[num_ext] = SCTP_CID_RECONF; + num_ext += 1; + } + if (sp->adaptation_ind) chunksize += sizeof(aiparam); @@ -2012,6 +2022,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc, for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { + case SCTP_CID_RECONF: + if (asoc->reconf_enable && + !asoc->peer.reconf_capable) + asoc->peer.reconf_capable = 1; + break; case SCTP_CID_FWD_TSN: if (asoc->prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; -- cgit v1.2.3 From 9fb657aec0e20b4ed4401c44a4140f8d7b7a9ca0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:46 +0800 Subject: sctp: add sockopt SCTP_ENABLE_STREAM_RESET This patch is to add sockopt SCTP_ENABLE_STREAM_RESET to get/set strreset_enable to indicate which reconf request type it supports, which is described in rfc6525 section 6.3.1. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 +++ include/uapi/linux/sctp.h | 7 ++++ net/sctp/associola.c | 1 + net/sctp/socket.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ee037ef15d65..d99b76e33b2e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1257,6 +1257,8 @@ struct sctp_endpoint { __u8 auth_enable:1, prsctp_enable:1, reconf_enable:1; + + __u8 strreset_enable; }; /* Recover the outter endpoint structure. */ @@ -1872,6 +1874,8 @@ struct sctp_association { prsctp_enable:1, reconf_enable:1; + __u8 strreset_enable; + __u32 strreset_outseq; /* Update after receiving response */ __u32 strreset_inseq; /* Update after receiving request */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index a406adcc0793..867be0f32fd7 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -115,6 +115,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_ENABLE_STREAM_RESET 118 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -138,6 +139,12 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_RTX_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX) #define SCTP_PR_PRIO_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO) +/* For enable stream reset */ +#define SCTP_ENABLE_RESET_STREAM_REQ 0x01 +#define SCTP_ENABLE_RESET_ASSOC_REQ 0x02 +#define SCTP_ENABLE_CHANGE_ASSOC_REQ 0x04 +#define SCTP_ENABLE_STRRESET_MASK 0x07 + /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ enum sctp_msg_flags { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68b99adc21a3..e50dc6d7543f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; asoc->reconf_enable = ep->reconf_enable; + asoc->strreset_enable = ep->strreset_enable; /* Save the hmacs and chunks list into this association */ if (ep->auth_hmacs_list) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 635e03412693..0a9bc984b6c8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3750,6 +3750,42 @@ out: return retval; } +static int sctp_setsockopt_enable_strreset(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value & (~SCTP_ENABLE_STRRESET_MASK)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->strreset_enable = params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->strreset_enable = params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3916,6 +3952,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); break; + case SCTP_ENABLE_STREAM_RESET: + retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6400,6 +6439,47 @@ out: return retval; } +static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->strreset_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->strreset_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6567,6 +6647,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_ENABLE_STREAM_RESET: + retval = sctp_getsockopt_enable_strreset(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From 7f9d68ac944e24ee5f9ac8d059ca00b1c1d34137 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:47 +0800 Subject: sctp: implement sender-side procedures for SSN Reset Request Parameter This patch is to implement sender-side procedures for the Outgoing and Incoming SSN Reset Request Parameter described in rfc6525 section 5.1.2 and 5.1.3. It is also add sockopt SCTP_RESET_STREAMS in rfc6525 section 6.3.2 for users. Note that the new asoc member strreset_outstanding is to make sure only one reconf request chunk on the fly as rfc6525 section 5.1.1 demands. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 ++++ include/net/sctp/structs.h | 1 + include/uapi/linux/sctp.h | 11 +++++++ net/sctp/outqueue.c | 33 +++++++++++++------ net/sctp/socket.c | 29 +++++++++++++++++ net/sctp/stream.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 149 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bc0e049b1474..3cfd365bcfbc 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -193,6 +193,12 @@ void sctp_remaddr_proc_exit(struct net *net); */ int sctp_offload_init(void); +/* + * sctp/stream.c + */ +int sctp_send_reset_streams(struct sctp_association *asoc, + struct sctp_reset_streams *params); + /* * Module global variables */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d99b76e33b2e..231fa9ac50bd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1875,6 +1875,7 @@ struct sctp_association { reconf_enable:1; __u8 strreset_enable; + __u8 strreset_outstanding; /* request param count on the fly */ __u32 strreset_outseq; /* Update after receiving response */ __u32 strreset_inseq; /* Update after receiving request */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 867be0f32fd7..03c27cefffb1 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -116,6 +116,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 #define SCTP_ENABLE_STREAM_RESET 118 +#define SCTP_RESET_STREAMS 119 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -145,6 +146,9 @@ typedef __s32 sctp_assoc_t; #define SCTP_ENABLE_CHANGE_ASSOC_REQ 0x04 #define SCTP_ENABLE_STRRESET_MASK 0x07 +#define SCTP_STREAM_RESET_INCOMING 0x01 +#define SCTP_STREAM_RESET_OUTGOING 0x02 + /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ enum sctp_msg_flags { @@ -1015,4 +1019,11 @@ struct sctp_info { __u32 __reserved3; }; +struct sctp_reset_streams { + sctp_assoc_t srs_assoc_id; + uint16_t srs_flags; + uint16_t srs_number_streams; /* 0 == ALL */ + uint16_t srs_stream_list[]; /* list if srs_num_streams is not 0 */ +}; + #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 34efaa4ef2f6..65abe22d8691 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -915,22 +915,28 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) case SCTP_CID_ECN_ECNE: case SCTP_CID_ASCONF: case SCTP_CID_FWD_TSN: + case SCTP_CID_RECONF: status = sctp_packet_transmit_chunk(packet, chunk, one_packet, gfp); if (status != SCTP_XMIT_OK) { /* put the chunk back */ list_add(&chunk->list, &q->control_chunk_list); - } else { - asoc->stats.octrlchunks++; - /* PR-SCTP C5) If a FORWARD TSN is sent, the - * sender MUST assure that at least one T3-rtx - * timer is running. - */ - if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { - sctp_transport_reset_t3_rtx(transport); - transport->last_time_sent = jiffies; - } + break; + } + + asoc->stats.octrlchunks++; + /* PR-SCTP C5) If a FORWARD TSN is sent, the + * sender MUST assure that at least one T3-rtx + * timer is running. + */ + if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + sctp_transport_reset_t3_rtx(transport); + transport->last_time_sent = jiffies; } + + if (chunk == asoc->strreset_chunk) + sctp_transport_reset_reconf_timer(transport); + break; default: @@ -1016,6 +1022,8 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { + __u32 sid = ntohs(chunk->subh.data_hdr->stream); + /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ @@ -1038,6 +1046,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) continue; } + if (asoc->stream->out[sid].state == SCTP_STREAM_CLOSED) { + sctp_outq_head_data(q, chunk); + goto sctp_flush_out; + } + /* If there is a specified transport, use it. * Otherwise, we want to use the active path. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0a9bc984b6c8..bee4dd3feabb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3786,6 +3786,32 @@ out: return retval; } +static int sctp_setsockopt_reset_streams(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_reset_streams *params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen < sizeof(struct sctp_reset_streams)) + return -EINVAL; + + params = memdup_user(optval, optlen); + if (IS_ERR(params)) + return PTR_ERR(params); + + asoc = sctp_id2assoc(sk, params->srs_assoc_id); + if (!asoc) + goto out; + + retval = sctp_send_reset_streams(asoc, params); + +out: + kfree(params); + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3955,6 +3981,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); break; + case SCTP_RESET_STREAMS: + retval = sctp_setsockopt_reset_streams(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f86de43cbbe5..13d5e07dcd7d 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -33,6 +33,7 @@ */ #include +#include struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) { @@ -83,3 +84,81 @@ void sctp_stream_clear(struct sctp_stream *stream) for (i = 0; i < stream->incnt; i++) stream->in[i].ssn = 0; } + +static int sctp_send_reconf(struct sctp_association *asoc, + struct sctp_chunk *chunk) +{ + struct net *net = sock_net(asoc->base.sk); + int retval = 0; + + retval = sctp_primitive_RECONF(net, asoc, chunk); + if (retval) + sctp_chunk_free(chunk); + + return retval; +} + +int sctp_send_reset_streams(struct sctp_association *asoc, + struct sctp_reset_streams *params) +{ + struct sctp_stream *stream = asoc->stream; + __u16 i, str_nums, *str_list; + struct sctp_chunk *chunk; + int retval = -EINVAL; + bool out, in; + + if (!asoc->peer.reconf_capable || + !(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) { + retval = -ENOPROTOOPT; + goto out; + } + + if (asoc->strreset_outstanding) { + retval = -EINPROGRESS; + goto out; + } + + out = params->srs_flags & SCTP_STREAM_RESET_OUTGOING; + in = params->srs_flags & SCTP_STREAM_RESET_INCOMING; + if (!out && !in) + goto out; + + str_nums = params->srs_number_streams; + str_list = params->srs_stream_list; + if (out && str_nums) + for (i = 0; i < str_nums; i++) + if (str_list[i] >= stream->outcnt) + goto out; + + if (in && str_nums) + for (i = 0; i < str_nums; i++) + if (str_list[i] >= stream->incnt) + goto out; + + chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); + if (!chunk) + goto out; + + if (out) { + if (str_nums) + for (i = 0; i < str_nums; i++) + stream->out[str_list[i]].state = + SCTP_STREAM_CLOSED; + else + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_CLOSED; + } + + asoc->strreset_outstanding = out + in; + asoc->strreset_chunk = chunk; + sctp_chunk_hold(asoc->strreset_chunk); + + retval = sctp_send_reconf(asoc, chunk); + if (retval) { + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + +out: + return retval; +} -- cgit v1.2.3 From b22de490869da354116ea4cbbaa09dcbc260b2b4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 17 Jan 2017 20:41:38 -0500 Subject: net: dsa: store CPU switch structure in the tree Store a dsa_switch pointer to the CPU switch in the tree instead of only its index. This avoids the need to initialize it to -1. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 8 ++++---- net/dsa/dsa.c | 7 +++---- net/dsa/dsa2.c | 5 ++--- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index b94d1f2ef912..c72ed7af2a2a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -124,7 +124,7 @@ struct dsa_switch_tree { /* * The switch and port to which the CPU is attached. */ - s8 cpu_switch; + struct dsa_switch *cpu_switch; s8 cpu_port; /* @@ -204,7 +204,7 @@ struct dsa_switch { static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); + return !!(ds == ds->dst->cpu_switch && p == ds->dst->cpu_port); } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) @@ -227,10 +227,10 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) * Else return the (DSA) port number that connects to the * switch that is one hop closer to the cpu. */ - if (dst->cpu_switch == ds->index) + if (dst->cpu_switch == ds) return dst->cpu_port; else - return ds->rtable[dst->cpu_switch]; + return ds->rtable[dst->cpu_switch->index]; } struct switchdev_trans; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index fd532487dfdf..b220609cfe6f 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -225,12 +225,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) continue; if (!strcmp(name, "cpu")) { - if (dst->cpu_switch != -1) { + if (!dst->cpu_switch) { netdev_err(dst->master_netdev, "multiple cpu ports?!\n"); return -EINVAL; } - dst->cpu_switch = index; + dst->cpu_switch = ds; dst->cpu_port = i; ds->cpu_port_mask |= 1 << i; } else if (!strcmp(name, "dsa")) { @@ -254,7 +254,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * tagging protocol to the preferred tagging format of this * switch. */ - if (dst->cpu_switch == index) { + if (dst->cpu_switch == ds) { enum dsa_tag_protocol tag_protocol; tag_protocol = ops->get_tag_protocol(ds); @@ -757,7 +757,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, dst->pd = pd; dst->master_netdev = dev; - dst->cpu_switch = -1; dst->cpu_port = -1; for (i = 0; i < pd->nr_chips; i++) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 42a41d84053c..020e072b4299 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -57,7 +57,6 @@ static struct dsa_switch_tree *dsa_add_dst(u32 tree) if (!dst) return NULL; dst->tree = tree; - dst->cpu_switch = -1; INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_switch_trees, &dst->list); kref_init(&dst->refcount); @@ -448,8 +447,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, if (!dst->master_netdev) dst->master_netdev = ethernet_dev; - if (dst->cpu_switch == -1) { - dst->cpu_switch = ds->index; + if (!dst->cpu_switch) { + dst->cpu_switch = ds; dst->cpu_port = index; } -- cgit v1.2.3 From c2a2efbbfcb31bedcf81170fc1aa920255c33b8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2017 05:06:08 -0800 Subject: net: remove bh disabling around percpu_counter accesses Shaohua Li made percpu_counter irq safe in commit 098faf5805c8 ("percpu_counter: make APIs irq safe") We can safely remove BH disable/enable sections around various percpu_counter manipulations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst_ops.h | 9 +-------- include/net/inet_frag.h | 8 +------- net/ipv4/inet_connection_sock.c | 3 +-- net/ipv4/proc.c | 2 -- net/ipv4/tcp.c | 2 -- net/ipv4/tcp_ipv4.c | 2 -- 6 files changed, 3 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index a0d443ca16fc..8a2b66d8d78d 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -46,19 +46,12 @@ static inline int dst_entries_get_fast(struct dst_ops *dst) static inline int dst_entries_get_slow(struct dst_ops *dst) { - int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&dst->pcpuc_entries); - local_bh_enable(); - return res; + return percpu_counter_sum_positive(&dst->pcpuc_entries); } static inline void dst_entries_add(struct dst_ops *dst, int val) { - local_bh_disable(); percpu_counter_add(&dst->pcpuc_entries, val); - local_bh_enable(); } static inline int dst_entries_init(struct dst_ops *dst) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 909972aa3acd..5894730ec82a 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -164,13 +164,7 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i) static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) { - unsigned int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&nf->mem); - local_bh_enable(); - - return res; + return percpu_counter_sum_positive(&nf->mem); } /* RFC 3168 support : diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 096a085611ab..c7f7c5335369 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -836,9 +836,8 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - local_bh_disable(); percpu_counter_dec(sk->sk_prot->orphan_count); - local_bh_enable(); + sock_put(sk); } EXPORT_SYMBOL(inet_csk_destroy_sock); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0247ca032232..a9deeb90dd36 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -57,10 +57,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) unsigned int frag_mem; int orphans, sockets; - local_bh_disable(); orphans = percpu_counter_sum_positive(&tcp_orphan_count); sockets = proto_sockets_allocated_sum_positive(&tcp_prot); - local_bh_enable(); socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index aba6ea76338e..c43eb1a831d7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -420,9 +420,7 @@ void tcp_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - local_bh_disable(); sk_sockets_allocated_inc(sk); - local_bh_enable(); } EXPORT_SYMBOL(tcp_init_sock); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3644fc117691..f7325b25b06e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1887,9 +1887,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_free_fastopen_req(tp); tcp_saved_syn_free(tp); - local_bh_disable(); sk_sockets_allocated_dec(sk); - local_bh_enable(); } EXPORT_SYMBOL(tcp_v4_destroy_sock); -- cgit v1.2.3 From 6c59ebd356ff2ca64cdf1f61c5fe17f6fa8fc045 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 20 Jan 2017 22:27:04 +0800 Subject: sock: use hlist_entry_safe Use hlist_entry_safe() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- include/net/sock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 389a0a619b45..7144750d14e5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -544,8 +544,7 @@ static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) static inline struct sock *sk_next(const struct sock *sk) { - return sk->sk_node.next ? - hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; + return hlist_entry_safe(sk->sk_node.next, struct sock, sk_node); } static inline struct sock *sk_nulls_next(const struct sock *sk) -- cgit v1.2.3 From 22fbece133b71895ca6bb66890b2d9b1ddaa908c Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 18 Jan 2017 15:14:56 -0500 Subject: csum: eliminate sparse warning in remcsum_unadjust() Cast second parameter of csum_sub() from __sum16 to __wsum. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- include/net/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/checksum.h b/include/net/checksum.h index 35d0fabd2782..aef2b2bb6603 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -179,7 +179,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { - *psum = csum_fold(csum_sub(delta, *psum)); + *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } #endif -- cgit v1.2.3 From cf1a56a4cf196a2922e66e9a8e0bf80d324c5548 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 20 Jan 2017 01:37:50 +0100 Subject: net: dsa: Remove hwmon support Only the Marvell mv88e6xxx DSA driver made use of the HWMON support in DSA. The temperature sensor registers are actually in the embedded PHYs, and the PHY driver now supports it. So remove all HWMON support from DSA and drivers. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 154 ---------------------------------- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 16 ---- include/net/dsa.h | 8 -- net/dsa/Kconfig | 11 --- net/dsa/Makefile | 1 - net/dsa/dsa.c | 4 - net/dsa/dsa_priv.h | 9 -- net/dsa/hwmon.c | 147 -------------------------------- 8 files changed, 350 deletions(-) delete mode 100644 net/dsa/hwmon.c (limited to 'include/net') diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 987b2dbbd35a..c7e08e13bb54 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2964,154 +2964,6 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip) of_node_put(chip->mdio_np); } -#ifdef CONFIG_NET_DSA_HWMON - -static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) -{ - struct mv88e6xxx_chip *chip = ds->priv; - u16 val; - int ret; - - *temp = 0; - - mutex_lock(&chip->reg_lock); - - ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6); - if (ret < 0) - goto error; - - /* Enable temperature sensor */ - ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); - if (ret < 0) - goto error; - - ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5)); - if (ret < 0) - goto error; - - /* Wait for temperature to stabilize */ - usleep_range(10000, 12000); - - ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); - if (ret < 0) - goto error; - - /* Disable temperature sensor */ - ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5)); - if (ret < 0) - goto error; - - *temp = ((val & 0x1f) - 5) * 5; - -error: - mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0); - mutex_unlock(&chip->reg_lock); - return ret; -} - -static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - u16 val; - int ret; - - *temp = 0; - - mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val); - mutex_unlock(&chip->reg_lock); - if (ret < 0) - return ret; - - *temp = (val & 0xff) - 25; - - return 0; -} - -static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) -{ - struct mv88e6xxx_chip *chip = ds->priv; - - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP)) - return -EOPNOTSUPP; - - if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip)) - return mv88e63xx_get_temp(ds, temp); - - return mv88e61xx_get_temp(ds, temp); -} - -static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - u16 val; - int ret; - - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) - return -EOPNOTSUPP; - - *temp = 0; - - mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); - mutex_unlock(&chip->reg_lock); - if (ret < 0) - return ret; - - *temp = (((val >> 8) & 0x1f) * 5) - 25; - - return 0; -} - -static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - u16 val; - int err; - - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) - return -EOPNOTSUPP; - - mutex_lock(&chip->reg_lock); - err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); - if (err) - goto unlock; - temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f); - err = mv88e6xxx_phy_page_write(chip, phy, 6, 26, - (val & 0xe0ff) | (temp << 8)); -unlock: - mutex_unlock(&chip->reg_lock); - - return err; -} - -static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - u16 val; - int ret; - - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) - return -EOPNOTSUPP; - - *alarm = false; - - mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); - mutex_unlock(&chip->reg_lock); - if (ret < 0) - return ret; - - *alarm = !!(val & 0x40); - - return 0; -} -#endif /* CONFIG_NET_DSA_HWMON */ - static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -4386,12 +4238,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .get_sset_count = mv88e6xxx_get_sset_count, .set_eee = mv88e6xxx_set_eee, .get_eee = mv88e6xxx_get_eee, -#ifdef CONFIG_NET_DSA_HWMON - .get_temp = mv88e6xxx_get_temp, - .get_temp_limit = mv88e6xxx_get_temp_limit, - .set_temp_limit = mv88e6xxx_set_temp_limit, - .get_temp_alarm = mv88e6xxx_get_temp_alarm, -#endif .get_eeprom_len = mv88e6xxx_get_eeprom_len, .get_eeprom = mv88e6xxx_get_eeprom, .set_eeprom = mv88e6xxx_set_eeprom, diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 466cfdadb7bd..ce8b43b14e96 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -497,12 +497,6 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_STU, - /* Internal temperature sensor. - * Available from any enabled port's PHY register 26, page 6. - */ - MV88E6XXX_CAP_TEMP, - MV88E6XXX_CAP_TEMP_LIMIT, - /* VLAN Table Unit. * The VTU is used to program 802.1Q VLANs. See GLOBAL_VTU_OP. */ @@ -533,8 +527,6 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) #define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* Ingress Rate Limit unit */ @@ -586,7 +578,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ @@ -605,8 +596,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_TEMP | \ - MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ @@ -621,7 +610,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ @@ -637,8 +625,6 @@ enum mv88e6xxx_cap { MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_TEMP | \ - MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ @@ -651,8 +637,6 @@ struct mv88e6xxx_ops; (MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_TEMP | \ - MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ MV88E6XXX_FLAGS_MULTI_CHIP | \ diff --git a/include/net/dsa.h b/include/net/dsa.h index c72ed7af2a2a..9d6cd923c48c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -307,14 +307,6 @@ struct dsa_switch_ops { int (*get_eee)(struct dsa_switch *ds, int port, struct ethtool_eee *e); -#ifdef CONFIG_NET_DSA_HWMON - /* Hardware monitoring */ - int (*get_temp)(struct dsa_switch *ds, int *temp); - int (*get_temp_limit)(struct dsa_switch *ds, int *temp); - int (*set_temp_limit)(struct dsa_switch *ds, int temp); - int (*get_temp_alarm)(struct dsa_switch *ds, bool *alarm); -#endif - /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); int (*get_eeprom)(struct dsa_switch *ds, diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 39bb5b3a82f2..9649238eef40 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -15,17 +15,6 @@ config NET_DSA if NET_DSA -config NET_DSA_HWMON - bool "Distributed Switch Architecture HWMON support" - default y - depends on HWMON && !(NET_DSA=y && HWMON=m) - ---help--- - Say Y if you want to expose thermal sensor data on switches supported - by the Distributed Switch Architecture. - - Some of those switches contain thermal sensors. This data is available - via the hwmon sysfs interface and exposes the onboard sensors. - # tagging formats config NET_DSA_TAG_BRCM bool diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 560b6747c276..a3380ed0e0be 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,7 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o slave.o dsa2.o -dsa_core-$(CONFIG_NET_DSA_HWMON) += hwmon.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 91f96e1bd2ec..77cb78767f1d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -316,8 +316,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret) return ret; - dsa_hwmon_register(ds); - return 0; } @@ -376,8 +374,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds) { int port; - dsa_hwmon_unregister(ds); - /* Destroy network devices for physical switch ports. */ for (port = 0; port < DSA_MAX_PORTS; port++) { if (!(ds->enabled_port_mask & (1 << port))) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7e3385ec73f4..63ae1484abae 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -56,15 +56,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds); void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds); -/* hwmon.c */ -#ifdef CONFIG_NET_DSA_HWMON -void dsa_hwmon_register(struct dsa_switch *ds); -void dsa_hwmon_unregister(struct dsa_switch *ds); -#else -static inline void dsa_hwmon_register(struct dsa_switch *ds) { } -static inline void dsa_hwmon_unregister(struct dsa_switch *ds) { } -#endif - /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/hwmon.c b/net/dsa/hwmon.c deleted file mode 100644 index 08831a811278..000000000000 --- a/net/dsa/hwmon.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * net/dsa/hwmon.c - HWMON subsystem support - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include - -#include "dsa_priv.h" - -static ssize_t temp1_input_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dsa_switch *ds = dev_get_drvdata(dev); - int temp, ret; - - ret = ds->ops->get_temp(ds, &temp); - if (ret < 0) - return ret; - - return sprintf(buf, "%d\n", temp * 1000); -} -static DEVICE_ATTR_RO(temp1_input); - -static ssize_t temp1_max_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dsa_switch *ds = dev_get_drvdata(dev); - int temp, ret; - - ret = ds->ops->get_temp_limit(ds, &temp); - if (ret < 0) - return ret; - - return sprintf(buf, "%d\n", temp * 1000); -} - -static ssize_t temp1_max_store(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - struct dsa_switch *ds = dev_get_drvdata(dev); - int temp, ret; - - ret = kstrtoint(buf, 0, &temp); - if (ret < 0) - return ret; - - ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); - if (ret < 0) - return ret; - - return count; -} -static DEVICE_ATTR_RW(temp1_max); - -static ssize_t temp1_max_alarm_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dsa_switch *ds = dev_get_drvdata(dev); - bool alarm; - int ret; - - ret = ds->ops->get_temp_alarm(ds, &alarm); - if (ret < 0) - return ret; - - return sprintf(buf, "%d\n", alarm); -} -static DEVICE_ATTR_RO(temp1_max_alarm); - -static struct attribute *dsa_hwmon_attrs[] = { - &dev_attr_temp1_input.attr, /* 0 */ - &dev_attr_temp1_max.attr, /* 1 */ - &dev_attr_temp1_max_alarm.attr, /* 2 */ - NULL -}; - -static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = container_of(kobj, struct device, kobj); - struct dsa_switch *ds = dev_get_drvdata(dev); - const struct dsa_switch_ops *ops = ds->ops; - umode_t mode = attr->mode; - - if (index == 1) { - if (!ops->get_temp_limit) - mode = 0; - else if (!ops->set_temp_limit) - mode &= ~S_IWUSR; - } else if (index == 2 && !ops->get_temp_alarm) { - mode = 0; - } - return mode; -} - -static const struct attribute_group dsa_hwmon_group = { - .attrs = dsa_hwmon_attrs, - .is_visible = dsa_hwmon_attrs_visible, -}; -__ATTRIBUTE_GROUPS(dsa_hwmon); - -void dsa_hwmon_register(struct dsa_switch *ds) -{ - const char *netname = netdev_name(ds->dst->master_netdev); - char hname[IFNAMSIZ + 1]; - int i, j; - - /* If the switch provides temperature accessors, register with hardware - * monitoring subsystem. Treat registration error as non-fatal. - */ - if (!ds->ops->get_temp) - return; - - /* Create valid hwmon 'name' attribute */ - for (i = j = 0; i < IFNAMSIZ && netname[i]; i++) { - if (isalnum(netname[i])) - hname[j++] = netname[i]; - } - hname[j] = '\0'; - scnprintf(ds->hwmon_name, sizeof(ds->hwmon_name), "%s_dsa%d", hname, - ds->index); - ds->hwmon_dev = hwmon_device_register_with_groups(NULL, ds->hwmon_name, - ds, dsa_hwmon_groups); - if (IS_ERR(ds->hwmon_dev)) { - pr_warn("DSA: failed to register HWMON subsystem for switch %d\n", - ds->index); - ds->hwmon_dev = NULL; - } else { - pr_info("DSA: registered HWMON subsystem for switch %d\n", - ds->index); - } -} - -void dsa_hwmon_unregister(struct dsa_switch *ds) -{ - if (ds->hwmon_dev) { - hwmon_device_unregister(ds->hwmon_dev); - ds->hwmon_dev = NULL; - } -} -- cgit v1.2.3 From c6c94aea821640ac422c435f9d4c895af76ed6f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Jan 2017 10:02:13 +0100 Subject: cfg80211: fix a documentation warning The new restructured text parser complains about the formatting, and really this should be a definition list. In order to fix this without introducing trailing whitespace, convert to the inline kernel-doc format. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 63 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b7aba6e1a586..870549480e9b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3178,22 +3178,6 @@ struct ieee80211_iface_limit { /** * struct ieee80211_iface_combination - possible interface combination - * @limits: limits for the given interface types - * @n_limits: number of limitations - * @num_different_channels: can use up to this many different channels - * @max_interfaces: maximum number of interfaces in total allowed in this - * group - * @beacon_int_infra_match: In this combination, the beacon intervals - * between infrastructure and AP types must match. This is required - * only in special cases. - * @radar_detect_widths: bitmap of channel widths supported for radar detection - * @radar_detect_regions: bitmap of regions supported for radar detection - * @beacon_int_min_gcd: This interface combination supports different - * beacon intervals. - * = 0 - all beacon intervals for different interface must be same. - * > 0 - any beacon interval for the interface part of this combination AND - * *GCD* of all beacon intervals from beaconing interfaces of this - * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3252,13 +3236,60 @@ struct ieee80211_iface_limit { * */ struct ieee80211_iface_combination { + /** + * @limits: + * limits for the given interface types + */ const struct ieee80211_iface_limit *limits; + + /** + * @num_different_channels: + * can use up to this many different channels + */ u32 num_different_channels; + + /** + * @max_interfaces: + * maximum number of interfaces in total allowed in this group + */ u16 max_interfaces; + + /** + * @n_limits: + * number of limitations + */ u8 n_limits; + + /** + * @beacon_int_infra_match: + * In this combination, the beacon intervals between infrastructure + * and AP types must match. This is required only in special cases. + */ bool beacon_int_infra_match; + + /** + * @radar_detect_widths: + * bitmap of channel widths supported for radar detection + */ u8 radar_detect_widths; + + /** + * @radar_detect_regions: + * bitmap of regions supported for radar detection + */ u8 radar_detect_regions; + + /** + * @beacon_int_min_gcd: + * This interface combination supports different beacon intervals. + * + * = 0 + * all beacon intervals for different interface must be same. + * > 0 + * any beacon interval for the interface part of this combination AND + * GCD of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. + */ u32 beacon_int_min_gcd; }; -- cgit v1.2.3 From 57eeb2086d6477968990e1790a9d8d0ec7ee8a4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Jan 2017 11:12:01 +0100 Subject: mac80211: fix documentation warnings For a few restructured text warnings in mac80211, making the documentation warning-free (for now). In order to not add trailing whitespace, but also not introduce too much noise into this change, move just the affected docs into inline comments. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 74 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 86967b85dfd0..33624ffbd5a5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1768,15 +1768,6 @@ struct ieee80211_sta_rates { * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. - * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes. This - * field is always valid for packets with a VHT preamble. For packets - * with a HT preamble, additional limits apply: - * + If the skb is transmitted as part of a BA agreement, the - * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. - * + If the skb is not part of a BA aggreement, the A-MSDU maximal - * size is min(max_amsdu_len, 7935) bytes. - * Both additional HT limits must be enforced by the low level driver. - * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2). * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) @@ -1799,6 +1790,22 @@ struct ieee80211_sta { bool tdls_initiator; bool mfp; u8 max_amsdu_subframes; + + /** + * @max_amsdu_len: + * indicates the maximal length of an A-MSDU in bytes. + * This field is always valid for packets with a VHT preamble. + * For packets with a HT preamble, additional limits apply: + * + * * If the skb is transmitted as part of a BA agreement, the + * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. + * * If the skb is not part of a BA aggreement, the A-MSDU maximal + * size is min(max_amsdu_len, 7935) bytes. + * + * Both additional HT limits must be enforced by the low level + * driver. This is defined by the spec (IEEE 802.11-2012 section + * 8.3.2.2 NOTE 2). + */ u16 max_amsdu_len; bool support_p2p_ps; u16 max_rc_amsdu_len; @@ -3203,26 +3210,6 @@ enum ieee80211_reconfig_type { * Returns non-zero if this device sent the last beacon. * The callback can sleep. * - * @ampdu_action: Perform a certain A-MPDU action - * The RA/TID combination determines the destination and TID we want - * the ampdu action to be performed for. The action is defined through - * ieee80211_ampdu_mlme_action. - * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver - * may neither send aggregates containing more subframes than @buf_size - * nor send aggregates in a way that lost frames would exceed the - * buffer size. If just limiting the aggregate size, this would be - * possible with a buf_size of 8: - * - TX: 1.....7 - * - RX: 2....7 (lost frame #1) - * - TX: 8..1... - * which is invalid since #1 was now re-transmitted well past the - * buffer size of 8. Correct ways to retransmit #1 would be: - * - TX: 1 or 18 or 81 - * Even "189" would be wrong since 1 could be lost again. - * - * Returns a negative error code on failure. - * The callback can sleep. - * * @get_survey: Return per-channel survey information * * @rfkill_poll: Poll rfkill hardware state. If you need this, you also @@ -3575,6 +3562,35 @@ struct ieee80211_ops { s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); + + /** + * @ampdu_action: + * Perform a certain A-MPDU action. + * The RA/TID combination determines the destination and TID we want + * the ampdu action to be performed for. The action is defined through + * ieee80211_ampdu_mlme_action. + * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver + * may neither send aggregates containing more subframes than @buf_size + * nor send aggregates in a way that lost frames would exceed the + * buffer size. If just limiting the aggregate size, this would be + * possible with a buf_size of 8: + * + * - ``TX: 1.....7`` + * - ``RX: 2....7`` (lost frame #1) + * - ``TX: 8..1...`` + * + * which is invalid since #1 was now re-transmitted well past the + * buffer size of 8. Correct ways to retransmit #1 would be: + * + * - ``TX: 1 or`` + * - ``TX: 18 or`` + * - ``TX: 81`` + * + * Even ``189`` would be wrong since 1 could be lost again. + * + * Returns a negative error code on failure. + * The callback can sleep. + */ int (*ampdu_action)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params); -- cgit v1.2.3 From 4548b683b78137f8eadeb312b94e20bb0d4a7141 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Fri, 20 Jan 2017 17:49:11 -0800 Subject: Introduce a sysctl that modifies the value of PROT_SOCK. Add net.ipv4.ip_unprivileged_port_start, which is a per namespace sysctl that denotes the first unprivileged inet port in the namespace. To disable all privileged ports set this to zero. It also checks for overlap with the local port range. The privileged and local range may not overlap. The use case for this change is to allow containerized processes to bind to priviliged ports, but prevent them from ever being allowed to modify their container's network configuration. The latter is accomplished by ensuring that the network namespace is not a child of the user namespace. This modification was needed to allow the container manager to disable a namespace's priviliged port restrictions without exposing control of the network namespace to processes in the user namespace. Signed-off-by: Krister Johansen Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 ++++++ include/net/ip.h | 10 +++++++ include/net/netns/ipv4.h | 1 + net/ipv4/af_inet.c | 5 +++- net/ipv4/sysctl_net_ipv4.c | 50 +++++++++++++++++++++++++++++++++- net/ipv6/af_inet6.c | 3 +- net/netfilter/ipvs/ip_vs_ctl.c | 7 ++--- net/sctp/socket.c | 10 ++++--- security/selinux/hooks.c | 3 +- 9 files changed, 86 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index aa1bb49f1dc6..17f2e7791042 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -822,6 +822,15 @@ ip_local_reserved_ports - list of comma separated ranges Default: Empty +ip_unprivileged_port_start - INTEGER + This is a per-namespace sysctl. It defines the first + unprivileged port in the network namespace. Privileged ports + require root or CAP_NET_BIND_SERVICE in order to bind to them. + To disable all privileged ports, set this to 0. It may not + overlap with the ip_local_reserved_ports range. + + Default: 1024 + ip_nonlocal_bind - BOOLEAN If set, allows processes to bind() to non-local IP addresses, which can be quite useful - but may break some applications. diff --git a/include/net/ip.h b/include/net/ip.h index ab6761a7c883..bf264a8db1ce 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -263,11 +263,21 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) return strcmp(name, "default") != 0 && strcmp(name, "all") != 0; } +static inline int inet_prot_sock(struct net *net) +{ + return net->ipv4.sysctl_ip_prot_sock; +} + #else static inline int inet_is_local_reserved_port(struct net *net, int port) { return 0; } + +static inline int inet_prot_sock(struct net *net) +{ + return PROT_SOCK; +} #endif __be32 inet_current_timestamp(void); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e3f5b6f26d5..e365732b8051 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { #ifdef CONFIG_SYSCTL unsigned long *sysctl_local_reserved_ports; + int sysctl_ip_prot_sock; #endif #ifdef CONFIG_IP_MROUTE diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index aae410bb655a..28fe8da4e1ac 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -479,7 +479,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) snum = ntohs(addr->sin_port); err = -EACCES; - if (snum && snum < PROT_SOCK && + if (snum && snum < inet_prot_sock(net) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; @@ -1700,6 +1700,9 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; +#ifdef CONFIG_SYSCTL + net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; +#endif return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c8d283615c6f..1b861997fdc5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -35,6 +35,8 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int ip_privileged_port_min; +static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; @@ -79,7 +81,12 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { - if (range[1] < range[0]) + /* Ensure that the upper limit is not smaller than the lower, + * and that the lower does not encroach upon the privileged + * port limit. + */ + if ((range[1] < range[0]) || + (range[0] < net->ipv4.sysctl_ip_prot_sock)) ret = -EINVAL; else set_local_port_range(net, range); @@ -88,6 +95,40 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, return ret; } +/* Validate changes from /proc interface. */ +static int ipv4_privileged_ports(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_ip_prot_sock); + int ret; + int pports; + int range[2]; + struct ctl_table tmp = { + .data = &pports, + .maxlen = sizeof(pports), + .mode = table->mode, + .extra1 = &ip_privileged_port_min, + .extra2 = &ip_privileged_port_max, + }; + + pports = net->ipv4.sysctl_ip_prot_sock; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) { + inet_get_local_port_range(net, &range[0], &range[1]); + /* Ensure that the local port range doesn't overlap with the + * privileged port range. + */ + if (range[0] < pports) + ret = -EINVAL; + else + net->ipv4.sysctl_ip_prot_sock = pports; + } + + return ret; +} static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { @@ -964,6 +1005,13 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &one, }, #endif + { + .procname = "ip_unprivileged_port_start", + .maxlen = sizeof(int), + .data = &init_net.ipv4.sysctl_ip_prot_sock, + .mode = 0644, + .proc_handler = ipv4_privileged_ports, + }, { } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index aa42123bc301..04db40620ea6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -302,7 +302,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; snum = ntohs(addr->sin6_port); - if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) + if (snum && snum < inet_prot_sock(net) && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; lock_sock(sk); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 55e0169caa4c..8b7416f4e01a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -426,10 +426,9 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol */ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); - if (svc == NULL - && protocol == IPPROTO_TCP - && atomic_read(&ipvs->ftpsvc_counter) - && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { + if (!svc && protocol == IPPROTO_TCP && + atomic_read(&ipvs->ftpsvc_counter) && + (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) { /* * Check if ftp service entry exists, the packet * might belong to FTP data connections. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bee4dd3feabb..d699d2cbf275 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -360,7 +360,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) } } - if (snum && snum < PROT_SOCK && + if (snum && snum < inet_prot_sock(net) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; @@ -1152,8 +1152,10 @@ static int __sctp_connect(struct sock *sk, * accept new associations, but it SHOULD NOT * be permitted to open new associations. */ - if (ep->base.bind_addr.port < PROT_SOCK && - !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) { + if (ep->base.bind_addr.port < + inet_prot_sock(net) && + !ns_capable(net->user_ns, + CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out_free; } @@ -1818,7 +1820,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * but it SHOULD NOT be permitted to open new * associations. */ - if (ep->base.bind_addr.port < PROT_SOCK && + if (ep->base.bind_addr.port < inet_prot_sock(net) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out_unlock; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c7c6619431d5..53cb6da5f1c6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4365,7 +4365,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in inet_get_local_port_range(sock_net(sk), &low, &high); - if (snum < max(PROT_SOCK, low) || snum > high) { + if (snum < max(inet_prot_sock(sock_net(sk)), low) || + snum > high) { err = sel_netport_sid(sk->sk_protocol, snum, &sid); if (err) -- cgit v1.2.3 From 6ae0a6286171154661b74f7f550f9441c6008424 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 23 Jan 2017 11:07:08 +0100 Subject: net: Introduce psample, a new genetlink channel for packet sampling Add a general way for kernel modules to sample packets, without being tied to any specific subsystem. This netlink channel can be used by tc, iptables, etc. and allow to standardize packet sampling in the kernel. For every sampled packet, the psample module adds the following metadata fields: PSAMPLE_ATTR_IIFINDEX - the packets input ifindex, if applicable PSAMPLE_ATTR_OIFINDEX - the packet output ifindex, if applicable PSAMPLE_ATTR_ORIGSIZE - the packet's original size, in case it has been truncated during sampling PSAMPLE_ATTR_SAMPLE_GROUP - the packet's sample group, which is set by the user who initiated the sampling. This field allows the user to differentiate between several samplers working simultaneously and filter packets relevant to him PSAMPLE_ATTR_GROUP_SEQ - sequence counter of last sent packet. The sequence is kept for each group PSAMPLE_ATTR_SAMPLE_RATE - the sampling rate used for sampling the packets PSAMPLE_ATTR_DATA - the actual packet bits The sampled packets are sent to the PSAMPLE_NL_MCGRP_SAMPLE multicast group. In addition, add the GET_GROUPS netlink command which allows the user to see the current sample groups, their refcount and sequence number. This command currently supports only netlink dump mode. Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Reviewed-by: Jamal Hadi Salim Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 7 + include/net/psample.h | 36 ++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/psample.h | 35 +++++ net/Kconfig | 1 + net/Makefile | 1 + net/psample/Kconfig | 15 +++ net/psample/Makefile | 5 + net/psample/psample.c | 301 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 402 insertions(+) create mode 100644 include/net/psample.h create mode 100644 include/uapi/linux/psample.h create mode 100644 net/psample/Kconfig create mode 100644 net/psample/Makefile create mode 100644 net/psample/psample.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index 3c84a8fecc09..d76fccd09266 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9957,6 +9957,13 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/block/ps3vram.c +PSAMPLE PACKET SAMPLING SUPPORT: +M: Yotam Gigi +S: Maintained +F: net/psample +F: include/net/psample.h +F: include/uapi/linux/psample.h + PSTORE FILESYSTEM M: Anton Vorontsov M: Colin Cross diff --git a/include/net/psample.h b/include/net/psample.h new file mode 100644 index 000000000000..8888b0e1a82e --- /dev/null +++ b/include/net/psample.h @@ -0,0 +1,36 @@ +#ifndef __NET_PSAMPLE_H +#define __NET_PSAMPLE_H + +#include +#include +#include + +struct psample_group { + struct list_head list; + struct net *net; + u32 group_num; + u32 refcount; + u32 seq; +}; + +struct psample_group *psample_group_get(struct net *net, u32 group_num); +void psample_group_put(struct psample_group *group); + +#if IS_ENABLED(CONFIG_PSAMPLE) + +void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, + u32 trunc_size, int in_ifindex, int out_ifindex, + u32 sample_rate); + +#else + +static inline void psample_sample_packet(struct psample_group *group, + struct sk_buff *skb, u32 trunc_size, + int in_ifindex, int out_ifindex, + u32 sample_rate) +{ +} + +#endif + +#endif /* __NET_PSAMPLE_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e600b50be77e..80ad741a42fa 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -305,6 +305,7 @@ header-y += netrom.h header-y += net_namespace.h header-y += net_tstamp.h header-y += nfc.h +header-y += psample.h header-y += nfs2.h header-y += nfs3.h header-y += nfs4.h diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h new file mode 100644 index 000000000000..ed48996ec0e8 --- /dev/null +++ b/include/uapi/linux/psample.h @@ -0,0 +1,35 @@ +#ifndef __UAPI_PSAMPLE_H +#define __UAPI_PSAMPLE_H + +enum { + /* sampled packet metadata */ + PSAMPLE_ATTR_IIFINDEX, + PSAMPLE_ATTR_OIFINDEX, + PSAMPLE_ATTR_ORIGSIZE, + PSAMPLE_ATTR_SAMPLE_GROUP, + PSAMPLE_ATTR_GROUP_SEQ, + PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_DATA, + + /* commands attributes */ + PSAMPLE_ATTR_GROUP_REFCOUNT, + + __PSAMPLE_ATTR_MAX +}; + +enum psample_command { + PSAMPLE_CMD_SAMPLE, + PSAMPLE_CMD_GET_GROUP, + PSAMPLE_CMD_NEW_GROUP, + PSAMPLE_CMD_DEL_GROUP, +}; + +/* Can be overridden at runtime by module option */ +#define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1) + +#define PSAMPLE_NL_MCGRP_CONFIG_NAME "config" +#define PSAMPLE_NL_MCGRP_SAMPLE_NAME "packets" +#define PSAMPLE_GENL_NAME "psample" +#define PSAMPLE_GENL_VERSION 1 + +#endif diff --git a/net/Kconfig b/net/Kconfig index 92ae1500d9e1..ce4aee69fc0d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -390,6 +390,7 @@ source "net/9p/Kconfig" source "net/caif/Kconfig" source "net/ceph/Kconfig" source "net/nfc/Kconfig" +source "net/psample/Kconfig" config LWTUNNEL bool "Network light weight tunnels" diff --git a/net/Makefile b/net/Makefile index 5d6e0e5ff7f8..7d41de48310e 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ +obj-$(CONFIG_PSAMPLE) += psample/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_MPLS) += mpls/ diff --git a/net/psample/Kconfig b/net/psample/Kconfig new file mode 100644 index 000000000000..d850246a6059 --- /dev/null +++ b/net/psample/Kconfig @@ -0,0 +1,15 @@ +# +# psample packet sampling configuration +# + +menuconfig PSAMPLE + depends on NET + tristate "Packet-sampling netlink channel" + default n + help + Say Y here to add support for packet-sampling netlink channel + This netlink channel allows transferring packets alongside some + metadata to userspace. + + To compile this support as a module, choose M here: the module will + be called psample. diff --git a/net/psample/Makefile b/net/psample/Makefile new file mode 100644 index 000000000000..609b0a79c9f3 --- /dev/null +++ b/net/psample/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the psample netlink channel +# + +obj-$(CONFIG_PSAMPLE) += psample.o diff --git a/net/psample/psample.c b/net/psample/psample.c new file mode 100644 index 000000000000..8aa58a918783 --- /dev/null +++ b/net/psample/psample.c @@ -0,0 +1,301 @@ +/* + * net/psample/psample.c - Netlink channel for packet sampling + * Copyright (c) 2017 Yotam Gigi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PSAMPLE_MAX_PACKET_SIZE 0xffff + +static LIST_HEAD(psample_groups_list); +static DEFINE_SPINLOCK(psample_groups_lock); + +/* multicast groups */ +enum psample_nl_multicast_groups { + PSAMPLE_NL_MCGRP_CONFIG, + PSAMPLE_NL_MCGRP_SAMPLE, +}; + +static const struct genl_multicast_group psample_nl_mcgrps[] = { + [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, +}; + +static struct genl_family psample_nl_family __ro_after_init; + +static int psample_group_nl_fill(struct sk_buff *msg, + struct psample_group *group, + enum psample_command cmd, u32 portid, u32 seq, + int flags) +{ + void *hdr; + int ret; + + hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num); + if (ret < 0) + goto error; + + ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount); + if (ret < 0) + goto error; + + ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq); + if (ret < 0) + goto error; + + genlmsg_end(msg, hdr); + return 0; + +error: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct psample_group *group; + int start = cb->args[0]; + int idx = 0; + int err; + + spin_lock(&psample_groups_lock); + list_for_each_entry(group, &psample_groups_list, list) { + if (!net_eq(group->net, sock_net(msg->sk))) + continue; + if (idx < start) { + idx++; + continue; + } + err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); + if (err) + break; + idx++; + } + + spin_unlock(&psample_groups_lock); + cb->args[0] = idx; + return msg->len; +} + +static const struct genl_ops psample_nl_ops[] = { + { + .cmd = PSAMPLE_CMD_GET_GROUP, + .dumpit = psample_nl_cmd_get_group_dumpit, + /* can be retrieved by unprivileged users */ + } +}; + +static struct genl_family psample_nl_family __ro_after_init = { + .name = PSAMPLE_GENL_NAME, + .version = PSAMPLE_GENL_VERSION, + .maxattr = PSAMPLE_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .mcgrps = psample_nl_mcgrps, + .ops = psample_nl_ops, + .n_ops = ARRAY_SIZE(psample_nl_ops), + .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), +}; + +static void psample_group_notify(struct psample_group *group, + enum psample_command cmd) +{ + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!msg) + return; + + err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI); + if (!err) + genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0, + PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC); + else + nlmsg_free(msg); +} + +static struct psample_group *psample_group_create(struct net *net, + u32 group_num) +{ + struct psample_group *group; + + group = kzalloc(sizeof(*group), GFP_ATOMIC); + if (!group) + return NULL; + + group->net = net; + group->group_num = group_num; + list_add_tail(&group->list, &psample_groups_list); + + psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP); + return group; +} + +static void psample_group_destroy(struct psample_group *group) +{ + psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); + list_del(&group->list); + kfree(group); +} + +static struct psample_group * +psample_group_lookup(struct net *net, u32 group_num) +{ + struct psample_group *group; + + list_for_each_entry(group, &psample_groups_list, list) + if ((group->group_num == group_num) && (group->net == net)) + return group; + return NULL; +} + +struct psample_group *psample_group_get(struct net *net, u32 group_num) +{ + struct psample_group *group; + + spin_lock(&psample_groups_lock); + + group = psample_group_lookup(net, group_num); + if (!group) { + group = psample_group_create(net, group_num); + if (!group) + goto out; + } + group->refcount++; + +out: + spin_unlock(&psample_groups_lock); + return group; +} +EXPORT_SYMBOL_GPL(psample_group_get); + +void psample_group_put(struct psample_group *group) +{ + spin_lock(&psample_groups_lock); + + if (--group->refcount == 0) + psample_group_destroy(group); + + spin_unlock(&psample_groups_lock); +} +EXPORT_SYMBOL_GPL(psample_group_put); + +void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, + u32 trunc_size, int in_ifindex, int out_ifindex, + u32 sample_rate) +{ + struct sk_buff *nl_skb; + int data_len; + int meta_len; + void *data; + int ret; + + meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) + + (out_ifindex ? nla_total_size(sizeof(u16)) : 0) + + nla_total_size(sizeof(u32)) + /* sample_rate */ + nla_total_size(sizeof(u32)) + /* orig_size */ + nla_total_size(sizeof(u32)) + /* group_num */ + nla_total_size(sizeof(u32)); /* seq */ + + data_len = min(skb->len, trunc_size); + if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) + data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN + - NLA_ALIGNTO; + + nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC); + if (unlikely(!nl_skb)) + return; + + data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0, + PSAMPLE_CMD_SAMPLE); + if (unlikely(!data)) + goto error; + + if (in_ifindex) { + ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex); + if (unlikely(ret < 0)) + goto error; + } + + if (out_ifindex) { + ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex); + if (unlikely(ret < 0)) + goto error; + } + + ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate); + if (unlikely(ret < 0)) + goto error; + + ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len); + if (unlikely(ret < 0)) + goto error; + + ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num); + if (unlikely(ret < 0)) + goto error; + + ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++); + if (unlikely(ret < 0)) + goto error; + + if (data_len) { + int nla_len = nla_total_size(data_len); + struct nlattr *nla; + + nla = (struct nlattr *)skb_put(nl_skb, nla_len); + nla->nla_type = PSAMPLE_ATTR_DATA; + nla->nla_len = nla_attr_size(data_len); + + if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) + goto error; + } + + genlmsg_end(nl_skb, data); + genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, + PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); + + return; +error: + pr_err_ratelimited("Could not create psample log message\n"); + nlmsg_free(nl_skb); +} +EXPORT_SYMBOL_GPL(psample_sample_packet); + +static int __init psample_module_init(void) +{ + return genl_register_family(&psample_nl_family); +} + +static void __exit psample_module_exit(void) +{ + genl_unregister_family(&psample_nl_family); +} + +module_init(psample_module_init); +module_exit(psample_module_exit); + +MODULE_AUTHOR("Yotam Gigi "); +MODULE_DESCRIPTION("netlink channel for packet sampling"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 5c5670fae43027778e84b9d9ff3b9d91a10a8131 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 23 Jan 2017 11:07:09 +0100 Subject: net/sched: Introduce sample tc action This action allows the user to sample traffic matched by tc classifier. The sampling consists of choosing packets randomly and sampling them using the psample module. The user can configure the psample group number, the sampling rate and the packet's truncation (to save kernel-user traffic). Example: To sample ingress traffic from interface eth1, one may use the commands: tc qdisc add dev eth1 handle ffff: ingress tc filter add dev eth1 parent ffff: \ matchall action sample rate 12 group 4 Where the first command adds an ingress qdisc and the second starts sampling randomly with an average of one sampled packet per 12 packets on dev eth1 to psample group 4. Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tc_act/tc_sample.h | 50 +++++++ include/uapi/linux/tc_act/Kbuild | 1 + include/uapi/linux/tc_act/tc_sample.h | 26 ++++ net/sched/Kconfig | 12 ++ net/sched/Makefile | 1 + net/sched/act_sample.c | 274 ++++++++++++++++++++++++++++++++++ 6 files changed, 364 insertions(+) create mode 100644 include/net/tc_act/tc_sample.h create mode 100644 include/uapi/linux/tc_act/tc_sample.h create mode 100644 net/sched/act_sample.c (limited to 'include/net') diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h new file mode 100644 index 000000000000..89e9305be880 --- /dev/null +++ b/include/net/tc_act/tc_sample.h @@ -0,0 +1,50 @@ +#ifndef __NET_TC_SAMPLE_H +#define __NET_TC_SAMPLE_H + +#include +#include +#include + +struct tcf_sample { + struct tc_action common; + u32 rate; + bool truncate; + u32 trunc_size; + struct psample_group __rcu *psample_group; + u32 psample_group_num; + struct list_head tcfm_list; + struct rcu_head rcu; +}; +#define to_sample(a) ((struct tcf_sample *)a) + +static inline bool is_tcf_sample(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + return a->ops && a->ops->type == TCA_ACT_SAMPLE; +#else + return false; +#endif +} + +static inline __u32 tcf_sample_rate(const struct tc_action *a) +{ + return to_sample(a)->rate; +} + +static inline bool tcf_sample_truncate(const struct tc_action *a) +{ + return to_sample(a)->truncate; +} + +static inline int tcf_sample_trunc_size(const struct tc_action *a) +{ + return to_sample(a)->trunc_size; +} + +static inline struct psample_group * +tcf_sample_psample_group(const struct tc_action *a) +{ + return rcu_dereference(to_sample(a)->psample_group); +} + +#endif /* __NET_TC_SAMPLE_H */ diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index e3db7403296f..ba62ddf0e58a 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -4,6 +4,7 @@ header-y += tc_defact.h header-y += tc_gact.h header-y += tc_ipt.h header-y += tc_mirred.h +header-y += tc_sample.h header-y += tc_nat.h header-y += tc_pedit.h header-y += tc_skbedit.h diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h new file mode 100644 index 000000000000..edc9058bb30d --- /dev/null +++ b/include/uapi/linux/tc_act/tc_sample.h @@ -0,0 +1,26 @@ +#ifndef __LINUX_TC_SAMPLE_H +#define __LINUX_TC_SAMPLE_H + +#include +#include +#include + +#define TCA_ACT_SAMPLE 26 + +struct tc_sample { + tc_gen; +}; + +enum { + TCA_SAMPLE_UNSPEC, + TCA_SAMPLE_TM, + TCA_SAMPLE_PARMS, + TCA_SAMPLE_RATE, + TCA_SAMPLE_TRUNC_SIZE, + TCA_SAMPLE_PSAMPLE_GROUP, + TCA_SAMPLE_PAD, + __TCA_SAMPLE_MAX +}; +#define TCA_SAMPLE_MAX (__TCA_SAMPLE_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a9aa38d43fa7..72cfa3a6bac0 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -650,6 +650,18 @@ config NET_ACT_MIRRED To compile this code as a module, choose M here: the module will be called act_mirred. +config NET_ACT_SAMPLE + tristate "Traffic Sampling" + depends on NET_CLS_ACT + select PSAMPLE + ---help--- + Say Y here to allow packet sampling tc action. The packet sample + action consists of statistically choosing packets and sampling + them using the psample module. + + To compile this code as a module, choose M here: the + module will be called act_sample. + config NET_ACT_IPT tristate "IPtables targets" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES diff --git a/net/sched/Makefile b/net/sched/Makefile index 4bdda3634e0b..7b915d226de7 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o +obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c new file mode 100644 index 000000000000..39229756de07 --- /dev/null +++ b/net/sched/act_sample.c @@ -0,0 +1,274 @@ +/* + * net/sched/act_sample.c - Packet sampling tc action + * Copyright (c) 2017 Yotam Gigi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SAMPLE_TAB_MASK 7 +static unsigned int sample_net_id; +static struct tc_action_ops act_sample_ops; + +static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { + [TCA_SAMPLE_PARMS] = { .len = sizeof(struct tc_sample) }, + [TCA_SAMPLE_RATE] = { .type = NLA_U32 }, + [TCA_SAMPLE_TRUNC_SIZE] = { .type = NLA_U32 }, + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NLA_U32 }, +}; + +static int tcf_sample_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, int ovr, + int bind) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + struct nlattr *tb[TCA_SAMPLE_MAX + 1]; + struct psample_group *psample_group; + struct tc_sample *parm; + struct tcf_sample *s; + bool exists = false; + int ret; + + if (!nla) + return -EINVAL; + ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy); + if (ret < 0) + return ret; + if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || + !tb[TCA_SAMPLE_PSAMPLE_GROUP]) + return -EINVAL; + + parm = nla_data(tb[TCA_SAMPLE_PARMS]); + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + s = to_sample(*a); + + ASSERT_RTNL(); + s->tcf_action = parm->action; + s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); + psample_group = psample_group_get(net, s->psample_group_num); + if (!psample_group) + return -ENOMEM; + RCU_INIT_POINTER(s->psample_group, psample_group); + + if (tb[TCA_SAMPLE_TRUNC_SIZE]) { + s->truncate = true; + s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); + } + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) +{ + struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); + struct psample_group *psample_group; + + psample_group = rcu_dereference_protected(s->psample_group, 1); + RCU_INIT_POINTER(s->psample_group, NULL); + psample_group_put(psample_group); +} + +static void tcf_sample_cleanup(struct tc_action *a, int bind) +{ + struct tcf_sample *s = to_sample(a); + + call_rcu(&s->rcu, tcf_sample_cleanup_rcu); +} + +static bool tcf_sample_dev_ok_push(struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + default: + return true; + } +} + +static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *psample_group; + int retval; + int size; + int iif; + int oif; + + tcf_lastuse_update(&s->tcf_tm); + bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); + retval = READ_ONCE(s->tcf_action); + + rcu_read_lock(); + psample_group = rcu_dereference(s->psample_group); + + /* randomly sample packets according to rate */ + if (psample_group && (prandom_u32() % s->rate == 0)) { + if (!skb_at_tc_ingress(skb)) { + iif = skb->skb_iif; + oif = skb->dev->ifindex; + } else { + iif = skb->dev->ifindex; + oif = 0; + } + + /* on ingress, the mac header gets popped, so push it back */ + if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) + skb_push(skb, skb->mac_len); + + size = s->truncate ? s->trunc_size : skb->len; + psample_sample_packet(psample_group, skb, size, iif, oif, + s->rate); + + if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) + skb_pull(skb, skb->mac_len); + } + + rcu_read_unlock(); + return retval; +} + +static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_sample *s = to_sample(a); + struct tc_sample opt = { + .index = s->tcf_index, + .action = s->tcf_action, + .refcnt = s->tcf_refcnt - ref, + .bindcnt = s->tcf_bindcnt - bind, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_SAMPLE_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + tcf_tm_dump(&t, &s->tcf_tm); + if (nla_put_64bit(skb, TCA_SAMPLE_TM, sizeof(t), &t, TCA_SAMPLE_PAD)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_SAMPLE_RATE, s->rate)) + goto nla_put_failure; + + if (s->truncate) + if (nla_put_u32(skb, TCA_SAMPLE_TRUNC_SIZE, s->trunc_size)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_SAMPLE_PSAMPLE_GROUP, s->psample_group_num)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_sample_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_sample_ops = { + .kind = "sample", + .type = TCA_ACT_SAMPLE, + .owner = THIS_MODULE, + .act = tcf_sample_act, + .dump = tcf_sample_dump, + .init = tcf_sample_init, + .cleanup = tcf_sample_cleanup, + .walk = tcf_sample_walker, + .lookup = tcf_sample_search, + .size = sizeof(struct tcf_sample), +}; + +static __net_init int sample_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); +} + +static void __net_exit sample_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations sample_net_ops = { + .init = sample_init_net, + .exit = sample_exit_net, + .id = &sample_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init sample_init_module(void) +{ + return tcf_register_action(&act_sample_ops, &sample_net_ops); +} + +static void __exit sample_cleanup_module(void) +{ + tcf_unregister_action(&act_sample_ops, &sample_net_ops); +} + +module_init(sample_init_module); +module_exit(sample_cleanup_module); + +MODULE_AUTHOR("Yotam Gigi "); +MODULE_DESCRIPTION("Packet sampling action"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 42f82e2e62ae748a27741e63dbb035bfbe3353c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 25 Jan 2017 15:36:57 +0100 Subject: wireless: radiotap: rewrite the radiotap header file The header file has grown a lot of #define's etc, but they are nicer as enums, so rewrite the file from the documentation as such. Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 455 ++++++++++++++------------------------- 1 file changed, 157 insertions(+), 298 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index d0e7e3f8e67a..d91f9e7f4d71 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,201 +1,54 @@ /* - * Copyright (c) 2003, 2004 David Young. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of David Young may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID - * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. - */ - -/* - * Modifications to fit into the linux IEEE 802.11 stack, - * Mike Kershaw (dragorn@kismetwireless.net) + * Copyright (c) 2017 Intel Deutschland GmbH + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifndef __RADIOTAP_H +#define __RADIOTAP_H -#ifndef IEEE80211RADIOTAP_H -#define IEEE80211RADIOTAP_H - -#include #include #include -/* Base version of the radiotap packet header data */ -#define PKTHDR_RADIOTAP_VERSION 0 - -/* A generic radio capture format is desirable. There is one for - * Linux, but it is neither rigidly defined (there were not even - * units given for some fields) nor easily extensible. - * - * I suggest the following extensible radio capture format. It is - * based on a bitmap indicating which fields are present. - * - * I am trying to describe precisely what the application programmer - * should expect in the following, and for that reason I tell the - * units and origin of each measurement (where it applies), or else I - * use sufficiently weaselly language ("is a monotonically nondecreasing - * function of...") that I cannot set false expectations for lawyerly - * readers. - */ - -/* - * The radio capture header precedes the 802.11 header. - * All data in the header is little endian on all platforms. +/** + * struct ieee82011_radiotap_header - base radiotap header */ struct ieee80211_radiotap_header { - u8 it_version; /* Version 0. Only increases - * for drastic changes, - * introduction of compatible - * new fields does not count. - */ - u8 it_pad; - __le16 it_len; /* length of the whole - * header in bytes, including - * it_version, it_pad, - * it_len, and data fields. - */ - __le32 it_present; /* A bitmap telling which - * fields are present. Set bit 31 - * (0x80000000) to extend the - * bitmap by another 32 bits. - * Additional extensions are made - * by setting bit 31. - */ + /** + * @it_version: radiotap version, always 0 + */ + uint8_t it_version; + + /** + * @it_pad: padding (or alignment) + */ + uint8_t it_pad; + + /** + * @it_len: overall radiotap header length + */ + __le16 it_len; + + /** + * @it_present: (first) present word + */ + __le32 it_present; } __packed; -/* Name Data type Units - * ---- --------- ----- - * - * IEEE80211_RADIOTAP_TSFT __le64 microseconds - * - * Value in microseconds of the MAC's 64-bit 802.11 Time - * Synchronization Function timer when the first bit of the - * MPDU arrived at the MAC. For received frames, only. - * - * IEEE80211_RADIOTAP_CHANNEL 2 x __le16 MHz, bitmap - * - * Tx/Rx frequency in MHz, followed by flags (see below). - * - * IEEE80211_RADIOTAP_FHSS __le16 see below - * - * For frequency-hopping radios, the hop set (first byte) - * and pattern (second byte). - * - * IEEE80211_RADIOTAP_RATE u8 500kb/s - * - * Tx/Rx data rate - * - * IEEE80211_RADIOTAP_DBM_ANTSIGNAL s8 decibels from - * one milliwatt (dBm) - * - * RF signal power at the antenna, decibel difference from - * one milliwatt. - * - * IEEE80211_RADIOTAP_DBM_ANTNOISE s8 decibels from - * one milliwatt (dBm) - * - * RF noise power at the antenna, decibel difference from one - * milliwatt. - * - * IEEE80211_RADIOTAP_DB_ANTSIGNAL u8 decibel (dB) - * - * RF signal power at the antenna, decibel difference from an - * arbitrary, fixed reference. - * - * IEEE80211_RADIOTAP_DB_ANTNOISE u8 decibel (dB) - * - * RF noise power at the antenna, decibel difference from an - * arbitrary, fixed reference point. - * - * IEEE80211_RADIOTAP_LOCK_QUALITY __le16 unitless - * - * Quality of Barker code lock. Unitless. Monotonically - * nondecreasing with "better" lock strength. Called "Signal - * Quality" in datasheets. (Is there a standard way to measure - * this?) - * - * IEEE80211_RADIOTAP_TX_ATTENUATION __le16 unitless - * - * Transmit power expressed as unitless distance from max - * power set at factory calibration. 0 is max power. - * Monotonically nondecreasing with lower power levels. - * - * IEEE80211_RADIOTAP_DB_TX_ATTENUATION __le16 decibels (dB) - * - * Transmit power expressed as decibel distance from max power - * set at factory calibration. 0 is max power. Monotonically - * nondecreasing with lower power levels. - * - * IEEE80211_RADIOTAP_DBM_TX_POWER s8 decibels from - * one milliwatt (dBm) - * - * Transmit power expressed as dBm (decibels from a 1 milliwatt - * reference). This is the absolute power level measured at - * the antenna port. - * - * IEEE80211_RADIOTAP_FLAGS u8 bitmap - * - * Properties of transmitted and received frames. See flags - * defined below. - * - * IEEE80211_RADIOTAP_ANTENNA u8 antenna index - * - * Unitless indication of the Rx/Tx antenna for this packet. - * The first antenna is antenna 0. - * - * IEEE80211_RADIOTAP_RX_FLAGS __le16 bitmap - * - * Properties of received frames. See flags defined below. - * - * IEEE80211_RADIOTAP_TX_FLAGS __le16 bitmap - * - * Properties of transmitted frames. See flags defined below. - * - * IEEE80211_RADIOTAP_RTS_RETRIES u8 data - * - * Number of rts retries a transmitted frame used. - * - * IEEE80211_RADIOTAP_DATA_RETRIES u8 data - * - * Number of unicast retries a transmitted frame used. - * - * IEEE80211_RADIOTAP_MCS u8, u8, u8 unitless - * - * Contains a bitmap of known fields/flags, the flags, and - * the MCS index. - * - * IEEE80211_RADIOTAP_AMPDU_STATUS u32, u16, u8, u8 unitless - * - * Contains the AMPDU information for the subframe. - * - * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 - * - * Contains VHT information about this frame. - * - * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable - * - * Contains timestamp information for this frame. - */ -enum ieee80211_radiotap_type { +/* version is always 0 */ +#define PKTHDR_RADIOTAP_VERSION 0 + +/* see the radiotap website for the descriptions */ +enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_TSFT = 0, IEEE80211_RADIOTAP_FLAGS = 1, IEEE80211_RADIOTAP_RATE = 2, @@ -214,7 +67,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TX_FLAGS = 15, IEEE80211_RADIOTAP_RTS_RETRIES = 16, IEEE80211_RADIOTAP_DATA_RETRIES = 17, - + /* 18 is XChannel, but it's not defined yet */ IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, @@ -226,129 +79,135 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_EXT = 31 }; -/* Channel flags. */ -#define IEEE80211_CHAN_TURBO 0x0010 /* Turbo channel */ -#define IEEE80211_CHAN_CCK 0x0020 /* CCK channel */ -#define IEEE80211_CHAN_OFDM 0x0040 /* OFDM channel */ -#define IEEE80211_CHAN_2GHZ 0x0080 /* 2 GHz spectrum channel. */ -#define IEEE80211_CHAN_5GHZ 0x0100 /* 5 GHz spectrum channel */ -#define IEEE80211_CHAN_PASSIVE 0x0200 /* Only passive scan allowed */ -#define IEEE80211_CHAN_DYN 0x0400 /* Dynamic CCK-OFDM channel */ -#define IEEE80211_CHAN_GFSK 0x0800 /* GFSK channel (FHSS PHY) */ -#define IEEE80211_CHAN_GSM 0x1000 /* GSM (900 MHz) */ -#define IEEE80211_CHAN_STURBO 0x2000 /* Static Turbo */ -#define IEEE80211_CHAN_HALF 0x4000 /* Half channel (10 MHz wide) */ -#define IEEE80211_CHAN_QUARTER 0x8000 /* Quarter channel (5 MHz wide) */ - -/* For IEEE80211_RADIOTAP_FLAGS */ -#define IEEE80211_RADIOTAP_F_CFP 0x01 /* sent/received - * during CFP - */ -#define IEEE80211_RADIOTAP_F_SHORTPRE 0x02 /* sent/received - * with short - * preamble - */ -#define IEEE80211_RADIOTAP_F_WEP 0x04 /* sent/received - * with WEP encryption - */ -#define IEEE80211_RADIOTAP_F_FRAG 0x08 /* sent/received - * with fragmentation - */ -#define IEEE80211_RADIOTAP_F_FCS 0x10 /* frame includes FCS */ -#define IEEE80211_RADIOTAP_F_DATAPAD 0x20 /* frame has padding between - * 802.11 header and payload - * (to 32-bit boundary) - */ -#define IEEE80211_RADIOTAP_F_BADFCS 0x40 /* bad FCS */ - -/* For IEEE80211_RADIOTAP_RX_FLAGS */ -#define IEEE80211_RADIOTAP_F_RX_BADPLCP 0x0002 /* frame has bad PLCP */ +/* for IEEE80211_RADIOTAP_FLAGS */ +enum ieee80211_radiotap_flags { + IEEE80211_RADIOTAP_F_CFP = 0x01, + IEEE80211_RADIOTAP_F_SHORTPRE = 0x02, + IEEE80211_RADIOTAP_F_WEP = 0x04, + IEEE80211_RADIOTAP_F_FRAG = 0x08, + IEEE80211_RADIOTAP_F_FCS = 0x10, + IEEE80211_RADIOTAP_F_DATAPAD = 0x20, + IEEE80211_RADIOTAP_F_BADFCS = 0x40, +}; -/* For IEEE80211_RADIOTAP_TX_FLAGS */ -#define IEEE80211_RADIOTAP_F_TX_FAIL 0x0001 /* failed due to excessive - * retries */ -#define IEEE80211_RADIOTAP_F_TX_CTS 0x0002 /* used cts 'protection' */ -#define IEEE80211_RADIOTAP_F_TX_RTS 0x0004 /* used rts/cts handshake */ -#define IEEE80211_RADIOTAP_F_TX_NOACK 0x0008 /* don't expect an ack */ +/* for IEEE80211_RADIOTAP_CHANNEL */ +enum ieee80211_radiotap_channel_flags { + IEEE80211_CHAN_CCK = 0x0020, + IEEE80211_CHAN_OFDM = 0x0040, + IEEE80211_CHAN_2GHZ = 0x0080, + IEEE80211_CHAN_5GHZ = 0x0100, + IEEE80211_CHAN_DYN = 0x0400, + IEEE80211_CHAN_HALF = 0x4000, + IEEE80211_CHAN_QUARTER = 0x8000, +}; +/* for IEEE80211_RADIOTAP_RX_FLAGS */ +enum ieee80211_radiotap_rx_flags { + IEEE80211_RADIOTAP_F_RX_BADPLCP = 0x0002, +}; -/* For IEEE80211_RADIOTAP_MCS */ -#define IEEE80211_RADIOTAP_MCS_HAVE_BW 0x01 -#define IEEE80211_RADIOTAP_MCS_HAVE_MCS 0x02 -#define IEEE80211_RADIOTAP_MCS_HAVE_GI 0x04 -#define IEEE80211_RADIOTAP_MCS_HAVE_FMT 0x08 -#define IEEE80211_RADIOTAP_MCS_HAVE_FEC 0x10 -#define IEEE80211_RADIOTAP_MCS_HAVE_STBC 0x20 +/* for IEEE80211_RADIOTAP_TX_FLAGS */ +enum ieee80211_radiotap_tx_flags { + IEEE80211_RADIOTAP_F_TX_FAIL = 0x0001, + IEEE80211_RADIOTAP_F_TX_CTS = 0x0002, + IEEE80211_RADIOTAP_F_TX_RTS = 0x0004, + IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008, +}; -#define IEEE80211_RADIOTAP_MCS_BW_MASK 0x03 -#define IEEE80211_RADIOTAP_MCS_BW_20 0 -#define IEEE80211_RADIOTAP_MCS_BW_40 1 -#define IEEE80211_RADIOTAP_MCS_BW_20L 2 -#define IEEE80211_RADIOTAP_MCS_BW_20U 3 -#define IEEE80211_RADIOTAP_MCS_SGI 0x04 -#define IEEE80211_RADIOTAP_MCS_FMT_GF 0x08 -#define IEEE80211_RADIOTAP_MCS_FEC_LDPC 0x10 -#define IEEE80211_RADIOTAP_MCS_STBC_MASK 0x60 -#define IEEE80211_RADIOTAP_MCS_STBC_1 1 -#define IEEE80211_RADIOTAP_MCS_STBC_2 2 -#define IEEE80211_RADIOTAP_MCS_STBC_3 3 +/* for IEEE80211_RADIOTAP_MCS "have" flags */ +enum ieee80211_radiotap_mcs_have { + IEEE80211_RADIOTAP_MCS_HAVE_BW = 0x01, + IEEE80211_RADIOTAP_MCS_HAVE_MCS = 0x02, + IEEE80211_RADIOTAP_MCS_HAVE_GI = 0x04, + IEEE80211_RADIOTAP_MCS_HAVE_FMT = 0x08, + IEEE80211_RADIOTAP_MCS_HAVE_FEC = 0x10, + IEEE80211_RADIOTAP_MCS_HAVE_STBC = 0x20, +}; -#define IEEE80211_RADIOTAP_MCS_STBC_SHIFT 5 +enum ieee80211_radiotap_mcs_flags { + IEEE80211_RADIOTAP_MCS_BW_MASK = 0x03, + IEEE80211_RADIOTAP_MCS_BW_20 = 0, + IEEE80211_RADIOTAP_MCS_BW_40 = 1, + IEEE80211_RADIOTAP_MCS_BW_20L = 2, + IEEE80211_RADIOTAP_MCS_BW_20U = 3, + + IEEE80211_RADIOTAP_MCS_SGI = 0x04, + IEEE80211_RADIOTAP_MCS_FMT_GF = 0x08, + IEEE80211_RADIOTAP_MCS_FEC_LDPC = 0x10, + IEEE80211_RADIOTAP_MCS_STBC_MASK = 0x60, + IEEE80211_RADIOTAP_MCS_STBC_1 = 1, + IEEE80211_RADIOTAP_MCS_STBC_2 = 2, + IEEE80211_RADIOTAP_MCS_STBC_3 = 3, + IEEE80211_RADIOTAP_MCS_STBC_SHIFT = 5, +}; -/* For IEEE80211_RADIOTAP_AMPDU_STATUS */ -#define IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN 0x0001 -#define IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN 0x0002 -#define IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN 0x0004 -#define IEEE80211_RADIOTAP_AMPDU_IS_LAST 0x0008 -#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR 0x0010 -#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN 0x0020 +/* for IEEE80211_RADIOTAP_AMPDU_STATUS */ +enum ieee80211_radiotap_ampdu_flags { + IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN = 0x0001, + IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN = 0x0002, + IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN = 0x0004, + IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008, + IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010, + IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020, +}; -/* For IEEE80211_RADIOTAP_VHT */ -#define IEEE80211_RADIOTAP_VHT_KNOWN_STBC 0x0001 -#define IEEE80211_RADIOTAP_VHT_KNOWN_TXOP_PS_NA 0x0002 -#define IEEE80211_RADIOTAP_VHT_KNOWN_GI 0x0004 -#define IEEE80211_RADIOTAP_VHT_KNOWN_SGI_NSYM_DIS 0x0008 -#define IEEE80211_RADIOTAP_VHT_KNOWN_LDPC_EXTRA_OFDM_SYM 0x0010 -#define IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED 0x0020 -#define IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH 0x0040 -#define IEEE80211_RADIOTAP_VHT_KNOWN_GROUP_ID 0x0080 -#define IEEE80211_RADIOTAP_VHT_KNOWN_PARTIAL_AID 0x0100 +/* for IEEE80211_RADIOTAP_VHT */ +enum ieee80211_radiotap_vht_known { + IEEE80211_RADIOTAP_VHT_KNOWN_STBC = 0x0001, + IEEE80211_RADIOTAP_VHT_KNOWN_TXOP_PS_NA = 0x0002, + IEEE80211_RADIOTAP_VHT_KNOWN_GI = 0x0004, + IEEE80211_RADIOTAP_VHT_KNOWN_SGI_NSYM_DIS = 0x0008, + IEEE80211_RADIOTAP_VHT_KNOWN_LDPC_EXTRA_OFDM_SYM = 0x0010, + IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED = 0x0020, + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH = 0x0040, + IEEE80211_RADIOTAP_VHT_KNOWN_GROUP_ID = 0x0080, + IEEE80211_RADIOTAP_VHT_KNOWN_PARTIAL_AID = 0x0100, +}; -#define IEEE80211_RADIOTAP_VHT_FLAG_STBC 0x01 -#define IEEE80211_RADIOTAP_VHT_FLAG_TXOP_PS_NA 0x02 -#define IEEE80211_RADIOTAP_VHT_FLAG_SGI 0x04 -#define IEEE80211_RADIOTAP_VHT_FLAG_SGI_NSYM_M10_9 0x08 -#define IEEE80211_RADIOTAP_VHT_FLAG_LDPC_EXTRA_OFDM_SYM 0x10 -#define IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED 0x20 +enum ieee80211_radiotap_vht_flags { + IEEE80211_RADIOTAP_VHT_FLAG_STBC = 0x01, + IEEE80211_RADIOTAP_VHT_FLAG_TXOP_PS_NA = 0x02, + IEEE80211_RADIOTAP_VHT_FLAG_SGI = 0x04, + IEEE80211_RADIOTAP_VHT_FLAG_SGI_NSYM_M10_9 = 0x08, + IEEE80211_RADIOTAP_VHT_FLAG_LDPC_EXTRA_OFDM_SYM = 0x10, + IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED = 0x20, +}; -#define IEEE80211_RADIOTAP_CODING_LDPC_USER0 0x01 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER1 0x02 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +enum ieee80211_radiotap_vht_coding { + IEEE80211_RADIOTAP_CODING_LDPC_USER0 = 0x01, + IEEE80211_RADIOTAP_CODING_LDPC_USER1 = 0x02, + IEEE80211_RADIOTAP_CODING_LDPC_USER2 = 0x04, + IEEE80211_RADIOTAP_CODING_LDPC_USER3 = 0x08, +}; -/* For IEEE80211_RADIOTAP_TIMESTAMP */ -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 +/* for IEEE80211_RADIOTAP_TIMESTAMP */ +enum ieee80211_radiotap_timestamp_unit_spos { + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK = 0x000F, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS = 0x0000, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US = 0x0001, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS = 0x0003, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK = 0x00F0, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU = 0x0000, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ = 0x0010, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU = 0x0020, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU = 0x0030, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN = 0x00F0, +}; -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 +enum ieee80211_radiotap_timestamp_flags { + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT = 0x00, + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT = 0x01, + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY = 0x02, +}; -/* helpers */ -static inline int ieee80211_get_radiotap_len(unsigned char *data) +/** + * ieee80211_get_radiotap_len - get radiotap header length + */ +static inline u16 ieee80211_get_radiotap_len(const char *data) { - struct ieee80211_radiotap_header *hdr = - (struct ieee80211_radiotap_header *)data; + struct ieee80211_radiotap_header *hdr = (void *)data; return get_unaligned_le16(&hdr->it_len); } -#endif /* IEEE80211_RADIOTAP_H */ +#endif /* __RADIOTAP_H */ -- cgit v1.2.3 From 1045ba77a5962a22bce7777678ef46714107ea63 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 24 Jan 2017 07:02:41 -0500 Subject: net sched actions: Add support for user cookies Introduce optional 128-bit action cookie. Like all other cookie schemes in the networking world (eg in protocols like http or existing kernel fib protocol field, etc) the idea is to save user state that when retrieved serves as a correlator. The kernel _should not_ intepret it. The user can store whatever they wish in the 128 bits. Sample exercise(showing variable length use of cookie) .. create an accept action with cookie a1b2c3d4 sudo $TC actions add action ok index 1 cookie a1b2c3d4 .. dump all gact actions.. sudo $TC -s actions ls action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 0 installed 5 sec used 5 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie a1b2c3d4 .. bind the accept action to a filter.. sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 1 ... send some traffic.. $ ping 127.0.0.1 -c 3 PING 127.0.0.1 (127.0.0.1) 56(84) bytes of data. 64 bytes from 127.0.0.1: icmp_seq=1 ttl=64 time=0.020 ms 64 bytes from 127.0.0.1: icmp_seq=2 ttl=64 time=0.027 ms 64 bytes from 127.0.0.1: icmp_seq=3 ttl=64 time=0.038 ms Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + include/net/pkt_cls.h | 8 ++++++++ include/uapi/linux/pkt_cls.h | 3 +++ net/sched/act_api.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 1d716449209e..cfa2ae33da9a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -41,6 +41,7 @@ struct tc_action { struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; + struct tc_cookie *act_cookie; }; #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f0a051480c6c..b43077e47d35 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -515,4 +515,12 @@ struct tc_cls_bpf_offload { u32 gen_flags; }; + +/* This structure holds cookie structure that is passed from user + * to the kernel for actions and classifiers + */ +struct tc_cookie { + u8 *data; + u32 len; +}; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index fd373ebd5a44..345551e71410 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -4,6 +4,8 @@ #include #include +#define TC_COOKIE_MAX_SIZE 16 + /* Action attributes */ enum { TCA_ACT_UNSPEC, @@ -12,6 +14,7 @@ enum { TCA_ACT_INDEX, TCA_ACT_STATS, TCA_ACT_PAD, + TCA_ACT_COOKIE, __TCA_ACT_MAX }; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cd08df91351d..3c5e29ba6594 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,12 @@ static void free_tcf(struct rcu_head *head) free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); + + if (p->act_cookie) { + kfree(p->act_cookie->data); + kfree(p->act_cookie); + } + kfree(p); } @@ -475,6 +482,12 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; + if (a->act_cookie) { + if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len, + a->act_cookie->data)) + goto nla_put_failure; + } + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -516,6 +529,22 @@ errout: return err; } +int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) +{ + a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL); + if (!a->act_cookie) + return -ENOMEM; + + a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); + if (!a->act_cookie->data) { + kfree(a->act_cookie); + return -ENOMEM; + } + a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]); + + return 0; +} + struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind) @@ -575,6 +604,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (err < 0) goto err_mod; + if (tb[TCA_ACT_COOKIE]) { + int cklen = nla_len(tb[TCA_ACT_COOKIE]); + + if (cklen > TC_COOKIE_MAX_SIZE) { + err = -EINVAL; + tcf_hash_release(a, bind); + goto err_mod; + } + + err = nla_memdup_cookie(a, tb); + if (err < 0) { + tcf_hash_release(a, bind); + goto err_mod; + } + } + /* module count goes up only when brand new policy is created * if it exists and is only bound to in a_o->init() then * ACT_P_CREATED is not returned (a zero is). -- cgit v1.2.3 From 065263f40f0972d5f1cd294bb0242bd5aa5f06b2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 23 Jan 2017 10:59:20 -0800 Subject: net/tcp-fastopen: refactor cookie check logic Refactor the cookie check logic in tcp_send_syn_data() into a function. This function will be called else where in later changes. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_fastopen.c | 21 +++++++++++++++++++++ net/ipv4/tcp_output.c | 16 ++-------------- 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c55d65f74f7f..de67541d7adf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1493,6 +1493,8 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie *foc, struct dst_entry *dst); void tcp_fastopen_init_key_once(bool publish); +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f51919535ca7..f90e09e1ff4c 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -325,3 +325,24 @@ fastopen: *foc = valid_foc; return NULL; } + +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) +{ + unsigned long last_syn_loss = 0; + int syn_loss = 0; + + tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); + + /* Recurring FO SYN losses: no cookie or data in SYN */ + if (syn_loss > 1 && + time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { + cookie->len = -1; + return false; + } + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + cookie->len = -1; + return true; + } + return cookie->len > 0; +} diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a1a1494b9dd..671c69535671 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3267,23 +3267,11 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0; - unsigned long last_syn_loss = 0; + int space, err = 0; struct sk_buff *syn_data; tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ - tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, - &syn_loss, &last_syn_loss); - /* Recurring FO SYN losses: revert to regular handshake temporarily */ - if (syn_loss > 1 && - time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { - fo->cookie.len = -1; - goto fallback; - } - - if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) - fo->cookie.len = -1; - else if (fo->cookie.len <= 0) + if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and -- cgit v1.2.3 From 19f6d3f3c8422d65b5e3d2162e30ef07c6e21ea2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 23 Jan 2017 10:59:22 -0800 Subject: net/tcp-fastopen: Add new API support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an alternative way to perform Fast Open on the active side (client). Prior to this patch, a client needs to replace the connect() call with sendto(MSG_FASTOPEN). This can be cumbersome for applications who want to use Fast Open: these socket operations are often done in lower layer libraries used by many other applications. Changing these libraries and/or the socket call sequences are not trivial. A more convenient approach is to perform Fast Open by simply enabling a socket option when the socket is created w/o changing other socket calls sequence: s = socket() create a new socket setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …); newly introduced sockopt If set, new functionality described below will be used. Return ENOTSUPP if TFO is not supported or not enabled in the kernel. connect() With cookie present, return 0 immediately. With no cookie, initiate 3WHS with TFO cookie-request option and return -1 with errno = EINPROGRESS. write()/sendmsg() With cookie present, send out SYN with data and return the number of bytes buffered. With no cookie, and 3WHS not yet completed, return -1 with errno = EINPROGRESS. No MSG_FASTOPEN flag is needed. read() Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but write() is not called yet. Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is established but no msg is received yet. Return number of bytes read if socket is established and there is msg received. The new API simplifies life for applications that always perform a write() immediately after a successful connect(). Such applications can now take advantage of Fast Open by merely making one new setsockopt() call at the time of creating the socket. Nothing else about the application's socket call sequence needs to change. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/inet_sock.h | 6 +++++- include/net/tcp.h | 1 + include/uapi/linux/tcp.h | 1 + net/ipv4/af_inet.c | 31 ++++++++++++++++++++++++------- net/ipv4/tcp.c | 35 ++++++++++++++++++++++++++++++++++- net/ipv4/tcp_fastopen.c | 33 +++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 7 ++++++- net/ipv6/tcp_ipv6.c | 5 +++++ 9 files changed, 111 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5371b3d70cfe..f88f4649ba6f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -222,7 +222,8 @@ struct tcp_sock { u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:5; + fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ + unused:4; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9cff977a7fb..aa95053dfc78 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -206,7 +206,11 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; - __u8 bind_address_no_port:1; + __u8 bind_address_no_port:1, + defer_connect:1; /* Indicates that fastopen_connect is set + * and cookie exists so we defer connect + * until first data frame is written + */ __u8 rcv_tos; __u8 convert_csum; int uc_index; diff --git a/include/net/tcp.h b/include/net/tcp.h index de67541d7adf..6ec4ea652f3f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1495,6 +1495,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, void tcp_fastopen_init_key_once(bool publish); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); +bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c53de2691cec..6ff35eb48d10 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -116,6 +116,7 @@ enum { #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 28fe8da4e1ac..92e7f3e957fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; long timeo; - if (addr_len < sizeof(uaddr->sa_family)) - return -EINVAL; + /* + * uaddr can be NULL and addr_len can be 0 if: + * sk is a TCP fastopen active socket and + * TCP_FASTOPEN_CONNECT sockopt is set and + * we already have a valid cookie for this socket. + * In this case, user can call write() after connect(). + * write() will invoke tcp_sendmsg_fastopen() which calls + * __inet_stream_connect(). + */ + if (uaddr) { + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; - if (uaddr->sa_family == AF_UNSPEC) { - err = sk->sk_prot->disconnect(sk, flags); - sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; - goto out; + if (uaddr->sa_family == AF_UNSPEC) { + err = sk->sk_prot->disconnect(sk, flags); + sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; + goto out; + } } switch (sock->state) { @@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -EISCONN; goto out; case SS_CONNECTING: - err = -EALREADY; + if (inet_sk(sk)->defer_connect) + err = -EINPROGRESS; + else + err = -EALREADY; /* Fall out of switch with err, set for this state */ break; case SS_UNCONNECTED: @@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTING; + if (!err && inet_sk(sk)->defer_connect) + goto out; + /* Just entered SS_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c43eb1a831d7..d9735b76d073 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; + } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + /* Active TCP fastopen socket with defer_connect + * Return POLLOUT so application can call write() + * in order for kernel to generate SYN+data + */ + mask |= POLLOUT | POLLWRNORM; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); @@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *inet = inet_sk(sk); int err, flags; if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) @@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, tp->fastopen_req->data = msg; tp->fastopen_req->size = size; + if (inet->defer_connect) { + err = tcp_connect(sk); + /* Same failure procedure as in tcp_v4/6_connect */ + if (err) { + tcp_set_state(sk, TCP_CLOSE); + inet->inet_dport = 0; + sk->sk_route_caps = 0; + } + } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); + inet->defer_connect = 0; *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; @@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); flags = msg->msg_flags; - if (flags & MSG_FASTOPEN) { + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; @@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; } break; + case TCP_FASTOPEN_CONNECT: + if (val > 1 || val < 0) { + err = -EINVAL; + } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + if (sk->sk_state == TCP_CLOSE) + tp->fastopen_connect = val; + else + err = -EINVAL; + } else { + err = -EOPNOTSUPP; + } + break; case TCP_TIMESTAMP: if (!tp->repair) err = -EPERM; @@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = icsk->icsk_accept_queue.fastopenq.max_qlen; break; + case TCP_FASTOPEN_CONNECT: + val = tp->fastopen_connect; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f90e09e1ff4c..9674bec4a0f8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -346,3 +346,36 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, } return cookie->len > 0; } + +/* This function checks if we want to defer sending SYN until the first + * write(). We defer under the following conditions: + * 1. fastopen_connect sockopt is set + * 2. we have a valid cookie + * Return value: return true if we want to defer until application writes data + * return false if we want to send out SYN immediately + */ +bool tcp_fastopen_defer_connect(struct sock *sk, int *err) +{ + struct tcp_fastopen_cookie cookie = { .len = 0 }; + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (tp->fastopen_connect && !tp->fastopen_req) { + if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { + inet_sk(sk)->defer_connect = 1; + return true; + } + + /* Alloc fastopen_req in order for FO option to be included + * in SYN + */ + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), + sk->sk_allocation); + if (tp->fastopen_req) + tp->fastopen_req->cookie = cookie; + else + *err = -ENOBUFS; + } + return false; +} +EXPORT_SYMBOL(tcp_fastopen_defer_connect); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a90b4540c11e..8c9e9aa17d66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); + rt = NULL; if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, @@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_id = tp->write_seq ^ jiffies; + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto failure; + err = tcp_connect(sk); - rt = NULL; if (err) goto failure; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0b7cd3d009b6..95c05e5293b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport, &tp->tsoffset); + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto late_failure; + err = tcp_connect(sk); if (err) goto late_failure; -- cgit v1.2.3 From 3979ad7e82dfe3fb94a51c3915e64ec64afa45c3 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 25 Jan 2017 14:42:46 +0100 Subject: net/tcp-fastopen: make connect()'s return case more consistent with non-TFO Without TFO, any subsequent connect() call after a successful one returns -1 EISCONN. The last API update ensured that __inet_stream_connect() can return -1 EINPROGRESS in response to sendmsg() when TFO is in use to indicate that the connection is now in progress. Unfortunately since this function is used both for connect() and sendmsg(), it has the undesired side effect of making connect() now return -1 EINPROGRESS as well after a successful call, while at the same time poll() returns POLLOUT. This can confuse some applications which happen to call connect() and to check for -1 EISCONN to ensure the connection is usable, and for which EINPROGRESS indicates a need to poll, causing a loop. This problem was encountered in haproxy where a call to connect() is precisely used in certain cases to confirm a connection's readiness. While arguably haproxy's behaviour should be improved here, it seems important to aim at a more robust behaviour when the goal of the new API is to make it easier to implement TFO in existing applications. This patch simply ensures that we preserve the same semantics as in the non-TFO case on the connect() syscall when using TFO, while still returning -1 EINPROGRESS on sendmsg(). For this we simply tell __inet_stream_connect() whether we're doing a regular connect() or in fact connecting for a sendmsg() call. Cc: Wei Wang Cc: Yuchung Cheng Cc: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 +- net/ipv4/af_inet.c | 6 +++--- net/ipv4/tcp.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5d683428fced..b7952d55b9c0 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -17,7 +17,7 @@ int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags); + int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92e7f3e957fa..685ba53df2d1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -570,7 +570,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) * TCP 'magic' in here. */ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) + int addr_len, int flags, int is_sendmsg) { struct sock *sk = sock->sk; int err; @@ -605,7 +605,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; case SS_CONNECTING: if (inet_sk(sk)->defer_connect) - err = -EINPROGRESS; + err = is_sendmsg ? -EINPROGRESS : -EISCONN; else err = -EALREADY; /* Fall out of switch with err, set for this state */ @@ -679,7 +679,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; lock_sock(sock->sk); - err = __inet_stream_connect(sock, uaddr, addr_len, flags); + err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); release_sock(sock->sk); return err; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d9735b76d073..2ed472ebf3b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1103,7 +1103,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, - msg->msg_namelen, flags); + msg->msg_namelen, flags, 1); inet->defer_connect = 0; *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); -- cgit v1.2.3 From 434502930f59995f37fcc2c02cab79e059fb5043 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 25 Jan 2017 15:04:17 +0100 Subject: net: dsa: Mop up remaining NET_DSA_HWMON references Previous patches have moved the temperature sensor code into the Marvell PHYs. A few now dead references to NET_DSA_HWMON were left behind. Go reap them. Reported-by: Valentin Rothberg Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 24 ------------------------ include/net/dsa.h | 8 -------- 2 files changed, 32 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 63912ef34606..b8b40753133e 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -295,7 +295,6 @@ DSA currently leverages the following subsystems: - MDIO/PHY library: drivers/net/phy/phy.c, mdio_bus.c - Switchdev: net/switchdev/* - Device Tree for various of_* functions -- HWMON: drivers/hwmon/* MDIO/PHY library ---------------- @@ -349,12 +348,6 @@ Documentation/devicetree/bindings/net/dsa/dsa.txt. PHY/MDIO library helper functions such as of_get_phy_mode(), of_phy_connect() are also used to query per-port PHY specific details: interface connection, MDIO bus location etc.. -HWMON ------ - -Some switch drivers feature internal temperature sensors which are exposed as -regular HWMON devices in /sys/class/hwmon/. - Driver development ================== @@ -495,23 +488,6 @@ Power management BR_STATE_DISABLED and propagating changes to the hardware if this port is disabled while being a bridge member -Hardware monitoring -------------------- - -These callbacks are only available if CONFIG_NET_DSA_HWMON is enabled: - -- get_temp: this function queries the given switch for its temperature - -- get_temp_limit: this function returns the switch current maximum temperature - limit - -- set_temp_limit: this function configures the maximum temperature limit allowed - -- get_temp_alarm: this function returns the critical temperature threshold - returning an alarm notification - -See Documentation/hwmon/sysfs-interface for details. - Bridge layer ------------ diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d6cd923c48c..08b340403927 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -178,14 +178,6 @@ struct dsa_switch { */ s8 rtable[DSA_MAX_SWITCHES]; -#ifdef CONFIG_NET_DSA_HWMON - /* - * Hardware monitoring information - */ - char hwmon_name[IFNAMSIZ + 8]; - struct device *hwmon_dev; -#endif - /* * The lower device this switch uses to talk to the host */ -- cgit v1.2.3 From 55ed0ce0898e15fec30d2ca2a563d7934b082375 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jan 2017 10:45:51 -0800 Subject: net: dsa: Pass device pointer to dsa_register_switch In preparation for allowing dsa_register_switch() to be supplied with device/platform data, pass down a struct device pointer instead of a struct device_node. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 7 +++---- drivers/net/dsa/qca8k.c | 2 +- include/net/dsa.h | 2 +- net/dsa/dsa2.c | 7 ++++--- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 5cbb14f6a03b..bb210b12ad1b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1894,7 +1894,7 @@ int b53_switch_register(struct b53_device *dev) pr_info("found switch: %s, rev %i\n", dev->name, dev->core_rev); - return dsa_register_switch(dev->ds, dev->ds->dev->of_node); + return dsa_register_switch(dev->ds, dev->ds->dev); } EXPORT_SYMBOL(b53_switch_register); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5668e778ed1d..921e53351786 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4356,8 +4356,7 @@ static struct dsa_switch_driver mv88e6xxx_switch_drv = { .ops = &mv88e6xxx_switch_ops, }; -static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, - struct device_node *np) +static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) { struct device *dev = chip->dev; struct dsa_switch *ds; @@ -4372,7 +4371,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, dev_set_drvdata(dev, ds); - return dsa_register_switch(ds, np); + return dsa_register_switch(ds, dev); } static void mv88e6xxx_unregister_switch(struct mv88e6xxx_chip *chip) @@ -4456,7 +4455,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) goto out_g2_irq; - err = mv88e6xxx_register_switch(chip, np); + err = mv88e6xxx_register_switch(chip); if (err) goto out_mdio; diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 54d270d59eb0..c084aa484d2b 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -964,7 +964,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) mutex_init(&priv->reg_mutex); dev_set_drvdata(&mdiodev->dev, priv); - return dsa_register_switch(priv->ds, priv->ds->dev->of_node); + return dsa_register_switch(priv->ds, &mdiodev->dev); } static void diff --git a/include/net/dsa.h b/include/net/dsa.h index 08b340403927..92fd795e9573 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -387,7 +387,7 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) } void dsa_unregister_switch(struct dsa_switch *ds); -int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); +int dsa_register_switch(struct dsa_switch *ds, struct device *dev); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 866222a8f9bf..2cf489c5e90f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -578,8 +578,9 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, return ports; } -static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np) +static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) { + struct device_node *np = dev->of_node; struct device_node *ports = dsa_get_ports(ds, np); struct dsa_switch_tree *dst; u32 tree, index; @@ -659,12 +660,12 @@ out: return err; } -int dsa_register_switch(struct dsa_switch *ds, struct device_node *np) +int dsa_register_switch(struct dsa_switch *ds, struct device *dev) { int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds, np); + err = _dsa_register_switch(ds, dev); mutex_unlock(&dsa2_mutex); return err; -- cgit v1.2.3 From d35a00b8e33dab7385f724e713ae71c8be0a49f4 Mon Sep 17 00:00:00 2001 From: Felix Jia Date: Thu, 26 Jan 2017 16:59:17 +1300 Subject: net/ipv6: allow sysctl to change link-local address generation mode The address generation mode for IPv6 link-local can only be configured by netlink messages. This patch adds the ability to change the address generation mode via sysctl. v1 -> v2 Removed the rtnl lock and switch to use RCU lock to iterate through the netdev list. v2 -> v3 Removed the addrgenmode variable from the idev structure and use the systcl storage for the flag. Simplifed the logic for sysctl handling by removing the supported for all operation. Added support for more types of tunnel interfaces for link-local address generation. Based the patches from net-next. v3 -> v4 Removed unnecessary whitespace changes. Signed-off-by: Felix Jia Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/if_inet6.h | 1 - include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 104 +++++++++++++++++++++++++++++++++++++--------- 4 files changed, 86 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 671d014e6429..71be5b330d21 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -69,6 +69,7 @@ struct ipv6_devconf { __s32 seg6_require_hmac; #endif __u32 enhanced_dad; + __u32 addr_gen_mode; struct ctl_table_header *sysctl_header; }; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 0fa4c324b713..f656f9051aca 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -205,7 +205,6 @@ struct inet6_dev { __s32 rs_interval; /* in jiffies */ __u8 rs_probes; - __u8 addr_gen_mode; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index eaf65dc82e22..8ef9e75e004e 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -182,6 +182,7 @@ enum { DEVCONF_SEG6_ENABLED, DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_ENHANCED_DAD, + DEVCONF_ADDR_GEN_MODE, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac9bd5620f81..e35259dd17ba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -243,6 +243,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .seg6_require_hmac = 0, #endif .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -294,6 +295,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .seg6_require_hmac = 0, #endif .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, }; /* Check if a valid qdisc is available */ @@ -386,9 +388,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); if (ndev->cnf.stable_secret.initialized) - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; else - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode; ndev->cnf.mtu6 = dev->mtu; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -2387,8 +2389,8 @@ static void manage_tempaddrs(struct inet6_dev *idev, static bool is_addr_mode_generate_stable(struct inet6_dev *idev) { - return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; + return idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; } int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, @@ -3152,7 +3154,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - switch (idev->addr_gen_mode) { + switch (idev->cnf.addr_gen_mode) { case IN6_ADDR_GEN_MODE_RANDOM: ipv6_gen_mode_random_init(idev); /* fallthrough */ @@ -3204,8 +3206,8 @@ static void addrconf_dev_config(struct net_device *dev) /* this device type has no EUI support */ if (dev->type == ARPHRD_NONE && - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) - idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; addrconf_addr_gen(idev, false); } @@ -4982,6 +4984,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; #endif array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; + array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; } static inline size_t inet6_ifla6_size(void) @@ -5093,7 +5096,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, if (!nla) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure; read_lock_bh(&idev->lock); @@ -5211,6 +5214,26 @@ static int inet6_validate_link_af(const struct net_device *dev, return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); } +static int check_addr_gen_mode(int mode) +{ + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + mode != IN6_ADDR_GEN_MODE_RANDOM) + return -EINVAL; + return 1; +} + +static int check_stable_privacy(struct inet6_dev *idev, struct net *net, + int mode) +{ + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !net->ipv6.devconf_dflt->stable_secret.initialized) + return -EINVAL; + return 1; +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { int err = -EINVAL; @@ -5232,18 +5255,11 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); - if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE && - mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - mode != IN6_ADDR_GEN_MODE_RANDOM) - return -EINVAL; - - if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - !idev->cnf.stable_secret.initialized && - !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) + if (check_addr_gen_mode(mode) < 0 || + check_stable_privacy(idev, dev_net(dev), mode) < 0) return -EINVAL; - idev->addr_gen_mode = mode; + idev->cnf.addr_gen_mode = mode; err = 0; } @@ -5652,6 +5668,47 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = 0; + int new_val; + struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; + struct net *net = (struct net *)ctl->extra2; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write) { + new_val = *((int *)ctl->data); + + if (check_addr_gen_mode(new_val) < 0) + return -EINVAL; + + /* request for default */ + if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { + ipv6_devconf_dflt.addr_gen_mode = new_val; + + /* request for individual net device */ + } else { + if (!idev) + return ret; + + if (check_stable_privacy(idev, net, new_val) < 0) + return -EINVAL; + + if (idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; + rtnl_lock(); + addrconf_dev_config(idev->dev); + rtnl_unlock(); + } + } + } + + return ret; +} + static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5702,14 +5759,14 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, struct inet6_dev *idev = __in6_dev_get(dev); if (idev) { - idev->addr_gen_mode = + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; } } } else { struct inet6_dev *idev = ctl->extra1; - idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; } out: @@ -6096,6 +6153,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_addr_gen_mode, + }, { /* sentinel */ } -- cgit v1.2.3 From 158f323b9868b59967ad96957c4ca388161be321 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Jan 2017 07:11:27 -0800 Subject: net: adjust skb->truesize in pskb_expand_head() Slava Shwartsman reported a warning in skb_try_coalesce(), when we detect skb->truesize is completely wrong. In his case, issue came from IPv6 reassembly coping with malicious datagrams, that forced various pskb_may_pull() to reallocate a bigger skb->head than the one allocated by NIC driver before entering GRO layer. Current code does not change skb->truesize, leaving this burden to callers if they care enough. Blindly changing skb->truesize in pskb_expand_head() is not easy, as some producers might track skb->truesize, for example in xmit path for back pressure feedback (sk->sk_wmem_alloc) We can detect the cases where it should be safe to change skb->truesize : 1) skb is not attached to a socket. 2) If it is attached to a socket, destructor is sock_edemux() My audit gave only two callers doing their own skb->truesize manipulation. I had to remove skb parameter in sock_edemux macro when CONFIG_INET is not set to avoid a compile error. Signed-off-by: Eric Dumazet Reported-by: Slava Shwartsman Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/core/skbuff.c | 14 +++++++++++--- net/netlink/af_netlink.c | 8 +++----- net/wireless/util.c | 2 -- 4 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 7144750d14e5..94e65fd70354 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1534,7 +1534,7 @@ void sock_efree(struct sk_buff *skb); #ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb); #else -#define sock_edemux(skb) sock_efree(skb) +#define sock_edemux sock_efree #endif int sock_setsockopt(struct socket *sock, int level, int op, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f8dbe4a7ab46..26c1344cc23e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1192,10 +1192,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone); int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { - int i; - u8 *data; - int size = nhead + skb_end_offset(skb) + ntail; + int i, osize = skb_end_offset(skb); + int size = osize + nhead + ntail; long off; + u8 *data; BUG_ON(nhead < 0); @@ -1257,6 +1257,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + + /* It is not generally safe to change skb->truesize. + * For the moment, we really care of rx path, or + * when skb is orphaned (not attached to a socket). + */ + if (!skb->sk || skb->destructor == sock_edemux) + skb->truesize += size - osize; + return 0; nofrags: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index edcc1e19ad53..7b73c7c161a9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1210,11 +1210,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) skb = nskb; } - if (!pskb_expand_head(skb, 0, -delta, - (allocation & ~__GFP_DIRECT_RECLAIM) | - __GFP_NOWARN | __GFP_NORETRY)) - skb->truesize -= delta; - + pskb_expand_head(skb, 0, -delta, + (allocation & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOWARN | __GFP_NORETRY); return skb; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 1b9296882dcd..68e5f2ecee1a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -618,8 +618,6 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) return -ENOMEM; - - skb->truesize += head_need; } if (encaps_data) { -- cgit v1.2.3 From a0c02161ecfc2f40a0837926efac5376bc6fd6d3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Jan 2017 15:29:36 -0500 Subject: net: dsa: variable number of ports Change the ports[DSA_MAX_PORTS] array of the dsa_switch structure for a zero-length array, allocated at the same time as the dsa_switch structure itself. A dsa_switch_alloc() helper is provided for that. This commit brings no functional change yet since we pass DSA_MAX_PORTS as the number of ports for the moment. Future patches can update the DSA drivers separately to support dynamic number of ports. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 7 ++++--- drivers/net/dsa/mv88e6xxx/chip.c | 3 +-- drivers/net/dsa/qca8k.c | 3 +-- include/net/dsa.h | 6 +++++- net/dsa/dsa.c | 5 ++--- net/dsa/dsa2.c | 16 ++++++++++++++++ 6 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bb210b12ad1b..31afc4d4b68b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1790,14 +1790,15 @@ struct b53_device *b53_switch_alloc(struct device *base, struct dsa_switch *ds; struct b53_device *dev; - ds = devm_kzalloc(base, sizeof(*ds) + sizeof(*dev), GFP_KERNEL); + ds = dsa_switch_alloc(base, DSA_MAX_PORTS); if (!ds) return NULL; - dev = (struct b53_device *)(ds + 1); + dev = devm_kzalloc(base, sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; ds->priv = dev; - ds->dev = base; dev->dev = base; dev->ds = ds; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 921e53351786..cb7b24748336 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4361,11 +4361,10 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) struct device *dev = chip->dev; struct dsa_switch *ds; - ds = devm_kzalloc(dev, sizeof(*ds), GFP_KERNEL); + ds = dsa_switch_alloc(dev, DSA_MAX_PORTS); if (!ds) return -ENOMEM; - ds->dev = dev; ds->priv = chip; ds->ops = &mv88e6xxx_switch_ops; diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index c084aa484d2b..f67c6a3cebff 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -954,12 +954,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL); + priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); if (!priv->ds) return -ENOMEM; priv->ds->priv = priv; - priv->ds->dev = &mdiodev->dev; priv->ds->ops = &qca8k_switch_ops; mutex_init(&priv->reg_mutex); dev_set_drvdata(&mdiodev->dev, priv); diff --git a/include/net/dsa.h b/include/net/dsa.h index 92fd795e9573..24e1d935ae68 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -190,8 +190,11 @@ struct dsa_switch { u32 cpu_port_mask; u32 enabled_port_mask; u32 phys_mii_mask; - struct dsa_port ports[DSA_MAX_PORTS]; struct mii_bus *slave_mii_bus; + + /* Dynamically allocated ports, keep last */ + size_t num_ports; + struct dsa_port ports[]; }; static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) @@ -386,6 +389,7 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) return dst->rcv != NULL; } +struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds, struct device *dev); #ifdef CONFIG_PM_SLEEP diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 07e863369e04..de3ffb421ee4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -347,8 +347,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Allocate and initialise switch state. */ - ds = devm_kzalloc(parent, sizeof(*ds), GFP_KERNEL); - if (ds == NULL) + ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); + if (!ds) return ERR_PTR(-ENOMEM); ds->dst = dst; @@ -356,7 +356,6 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->cd = cd; ds->ops = ops; ds->priv = priv; - ds->dev = parent; ret = dsa_switch_setup_one(ds, parent); if (ret) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 75f5d1f8554b..4b3a44bec5c8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -666,6 +666,22 @@ out: return err; } +struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) +{ + size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); + struct dsa_switch *ds; + + ds = devm_kzalloc(dev, size, GFP_KERNEL); + if (!ds) + return NULL; + + ds->dev = dev; + ds->num_ports = n; + + return ds; +} +EXPORT_SYMBOL_GPL(dsa_switch_alloc); + int dsa_register_switch(struct dsa_switch *ds, struct device *dev) { int err; -- cgit v1.2.3 From 818be8489d6fc8f4cc2c7699bbfd8e1983080f10 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Jan 2017 15:29:38 -0500 Subject: net: dsa: add ds and index to dsa_port Add the physical switch instance and port index a DSA port belongs to to the dsa_port structure. That can be used later to retrieve information about a physical port when configuring a switch fabric, or lighten up struct dsa_slave_priv. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/dsa2.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 24e1d935ae68..6bd1f8b05dbd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -140,6 +140,8 @@ struct dsa_switch_tree { }; struct dsa_port { + struct dsa_switch *ds; + unsigned int index; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6e7b3e88b778..9f8cc26be9ea 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -670,6 +670,7 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) { size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); struct dsa_switch *ds; + int i; ds = devm_kzalloc(dev, size, GFP_KERNEL); if (!ds) @@ -678,6 +679,11 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) ds->dev = dev; ds->num_ports = n; + for (i = 0; i < ds->num_ports; ++i) { + ds->ports[i].index = i; + ds->ports[i].ds = ds; + } + return ds; } EXPORT_SYMBOL_GPL(dsa_switch_alloc); -- cgit v1.2.3 From a5e9a02e1f182237ef44eb3919cf4dd45ed4db9b Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Jan 2017 15:29:40 -0500 Subject: net: dsa: move bridge device in dsa_port Move the bridge_dev pointer from dsa_slave_priv to dsa_port so that DSA drivers can access this information and remove the need to cache it. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/dsa_priv.h | 1 - net/dsa/slave.c | 10 +++++----- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6bd1f8b05dbd..924533fd4425 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -146,6 +146,7 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; u8 stp_state; + struct net_device *bridge_dev; }; struct dsa_switch { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index c519bd0e9206..3022f2e42cdc 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -38,7 +38,6 @@ struct dsa_slave_priv { int old_pause; int old_duplex; - struct net_device *bridge_dev; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a2f0267c4a3c..cdb1df87e111 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -64,9 +64,9 @@ static int dsa_slave_get_iflink(const struct net_device *dev) return p->dp->ds->dst->master_netdev->ifindex; } -static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) +static inline bool dsa_port_is_bridged(struct dsa_port *dp) { - return !!p->bridge_dev; + return !!dp->bridge_dev; } static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) @@ -98,7 +98,7 @@ static int dsa_slave_open(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = p->dp->ds->dst->master_netdev; struct dsa_switch *ds = p->dp->ds; - u8 stp_state = dsa_port_is_bridged(p) ? + u8 stp_state = dsa_port_is_bridged(p->dp) ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; @@ -557,7 +557,7 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, struct dsa_switch *ds = p->dp->ds; int ret = -EOPNOTSUPP; - p->bridge_dev = br; + p->dp->bridge_dev = br; if (ds->ops->port_bridge_join) ret = ds->ops->port_bridge_join(ds, p->dp->index, br); @@ -574,7 +574,7 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) if (ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, p->dp->index); - p->bridge_dev = NULL; + p->dp->bridge_dev = NULL; /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional -- cgit v1.2.3 From f123f2fbedc7c2723ceb050cd88c2ea1d6a8be32 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 27 Jan 2017 15:29:41 -0500 Subject: net: dsa: pass bridge device when a port leaves Upon reception of the NETDEV_CHANGEUPPER, a leaving port is already unbridged, so reflect this by assigning the port's bridge_dev pointer to NULL before calling the port_bridge_leave DSA driver operation. Now that the bridge_dev pointer is exposed to the drivers, reflecting the current state of the DSA switch fabric is necessary for the drivers to adjust their port based VLANs correctly. Pass the bridge device pointer to the port_bridge_leave operation so that drivers have all information to re-program their chips properly, and do not need to cache it anymore. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- drivers/net/dsa/b53/b53_priv.h | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 3 ++- drivers/net/dsa/qca8k.c | 2 +- include/net/dsa.h | 3 ++- net/dsa/slave.c | 10 +++++----- 6 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 31afc4d4b68b..32fdcf5570c8 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1354,7 +1354,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) } EXPORT_SYMBOL(b53_br_join); -void b53_br_leave(struct dsa_switch *ds, int port) +void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct b53_device *dev = ds->priv; struct net_device *bridge = dev->ports[port].bridge_dev; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index a8031b382c55..5dafb70e75fc 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -382,7 +382,7 @@ void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data); void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds); int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge); -void b53_br_leave(struct dsa_switch *ds, int port); +void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge); void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state); void b53_br_fast_age(struct dsa_switch *ds, int port); int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index cb7b24748336..8eb0dc063f4e 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2343,7 +2343,8 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, return err; } -static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) +static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, + struct net_device *br) { struct mv88e6xxx_chip *chip = ds->priv; struct net_device *bridge = chip->ports[port].bridge_dev; diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index f67c6a3cebff..c85b187aa3d9 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -775,7 +775,7 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, } static void -qca8k_port_bridge_leave(struct dsa_switch *ds, int port) +qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; int i; diff --git a/include/net/dsa.h b/include/net/dsa.h index 924533fd4425..b951e2ebda75 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -325,7 +325,8 @@ struct dsa_switch_ops { int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); - void (*port_bridge_leave)(struct dsa_switch *ds, int port); + void (*port_bridge_leave)(struct dsa_switch *ds, int port, + struct net_device *bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cdb1df87e111..08725286f79d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -565,16 +565,16 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, return ret == -EOPNOTSUPP ? 0 : ret; } -static void dsa_slave_bridge_port_leave(struct net_device *dev) +static void dsa_slave_bridge_port_leave(struct net_device *dev, + struct net_device *br) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; + p->dp->bridge_dev = NULL; if (ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, p->dp->index); - - p->dp->bridge_dev = NULL; + ds->ops->port_bridge_leave(ds, p->dp->index, br); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional @@ -1343,7 +1343,7 @@ static int dsa_slave_port_upper_event(struct net_device *dev, if (info->linking) err = dsa_slave_bridge_port_join(dev, upper); else - dsa_slave_bridge_port_leave(dev); + dsa_slave_bridge_port_leave(dev, upper); } break; -- cgit v1.2.3 From bf9f26485d232916cdc2257e42831781e1f075e8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Jan 2017 09:48:40 -0800 Subject: net: dsa: Hook {get,set}_rxnfc ethtool operations In preparation for adding support for CFP/TCAMP in the bcm_sf2 driver add the plumbing to call into driver specific {get,set}_rxnfc operations. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 8 ++++++++ net/dsa/slave.c | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index b951e2ebda75..d5d618c3de64 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -377,6 +377,14 @@ struct dsa_switch_ops { int (*port_mdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, int (*cb)(struct switchdev_obj *obj)); + + /* + * RXNFC + */ + int (*get_rxnfc)(struct dsa_switch *ds, int port, + struct ethtool_rxnfc *nfc, u32 *rule_locs); + int (*set_rxnfc)(struct dsa_switch *ds, int port, + struct ethtool_rxnfc *nfc); }; struct dsa_switch_driver { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 08725286f79d..6881889e1a9b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1002,6 +1002,30 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) ops->get_strings = dsa_cpu_port_get_strings; } +static int dsa_slave_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc, u32 *rule_locs) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->get_rxnfc) + return -EOPNOTSUPP; + + return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs); +} + +static int dsa_slave_set_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->set_rxnfc) + return -EOPNOTSUPP; + + return ds->ops->set_rxnfc(ds, p->dp->index, nfc); +} + static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, @@ -1020,6 +1044,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_eee = dsa_slave_get_eee, .get_link_ksettings = dsa_slave_get_link_ksettings, .set_link_ksettings = dsa_slave_set_link_ksettings, + .get_rxnfc = dsa_slave_get_rxnfc, + .set_rxnfc = dsa_slave_set_rxnfc, }; static const struct net_device_ops dsa_slave_netdev_ops = { -- cgit v1.2.3 From 63a6fff353d01da5a22b72670c434bf12fa0e3b8 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 26 Jan 2017 18:02:24 +0000 Subject: net: Avoid receiving packets with an l3mdev on unbound UDP sockets Packets arriving in a VRF currently are delivered to UDP sockets that aren't bound to any interface. TCP defaults to not delivering packets arriving in a VRF to unbound sockets. IP route lookup and socket transmit both assume that unbound means using the default table and UDP applications that haven't been changed to be aware of VRFs may not function correctly in this case since they may not be able to handle overlapping IP address ranges, or be able to send packets back to the original sender if required. So add a sysctl, udp_l3mdev_accept, to control this behaviour with it being analgous to the existing tcp_l3mdev_accept, namely to allow a process to have a VRF-global listen socket. Have this default to off as this is the behaviour that users will expect, given that there is no explicit mechanism to set unmodified VRF-unaware application into a default VRF. Signed-off-by: Robert Shearman Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++++++ Documentation/networking/vrf.txt | 7 ++++--- include/net/netns/ipv4.h | 4 ++++ net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/udp.c | 27 ++++++++++++++++++++------- net/ipv6/udp.c | 27 ++++++++++++++++++++------- 6 files changed, 66 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17f2e7791042..fc73eeb7b3b8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -721,6 +721,13 @@ tcp_challenge_ack_limit - INTEGER UDP variables: +udp_l3mdev_accept - BOOLEAN + Enabling this option allows a "global" bound socket to work + across L3 master domains (e.g., VRFs) with packets capable of + being received regardless of the L3 domain in which they + originated. Only valid when the kernel was compiled with + CONFIG_NET_L3_MASTER_DEV. + udp_mem - vector of 3 INTEGERs: min, pressure, max Number of pages allowed for queueing by all UDP sockets. diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index 755dab856392..3918dae964d4 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt @@ -98,10 +98,11 @@ VRF device: or to specify the output device using cmsg and IP_PKTINFO. -TCP services running in the default VRF context (ie., not bound to any VRF -device) can work across all VRF domains by enabling the tcp_l3mdev_accept -sysctl option: +TCP & UDP services running in the default VRF context (ie., not bound +to any VRF device) can work across all VRF domains by enabling the +tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: sysctl -w net.ipv4.tcp_l3mdev_accept=1 + sysctl -w net.ipv4.udp_l3mdev_accept=1 netfilter rules on the VRF device can be used to limit access to services running in the default VRF context as well. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e365732b8051..622d2da27135 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -124,6 +124,10 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_udp_l3mdev_accept; +#endif + int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; int sysctl_igmp_llm_reports; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b861997fdc5..d6880a6149ee 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1012,6 +1012,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_privileged_ports, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "udp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { } }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d6dddcf59e79..cf6ba3387401 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -134,6 +134,17 @@ EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) +/* IPCB reference means this can not be used from early demux */ +static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_udp_l3mdev_accept && + skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) + return true; +#endif + return false; +} + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -369,7 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, int dif) + __be32 daddr, unsigned short hnum, int dif, + bool exact_dif) { int score; struct inet_sock *inet; @@ -400,7 +412,7 @@ static int compute_score(struct sock *sk, struct net *net, score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -425,7 +437,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif, + __be32 daddr, unsigned int hnum, int dif, bool exact_dif, struct udp_hslot *hslot2, struct sk_buff *skb) { @@ -437,7 +449,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -472,6 +484,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + bool exact_dif = udp_lib_exact_dif_match(net, skb); int score, badness, matches = 0, reuseport = 0; u32 hash = 0; @@ -484,7 +497,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); @@ -499,7 +512,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, skb); } return result; } @@ -508,7 +521,7 @@ begin: badness = 0; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 05d69324862e..b4c6516a3a0c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -55,6 +55,16 @@ #include #include "udp_impl.h" +static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_udp_l3mdev_accept && + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -118,7 +128,7 @@ static void udp_v6_rehash(struct sock *sk) static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, - int dif) + int dif, bool exact_dif) { int score; struct inet_sock *inet; @@ -149,7 +159,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -165,7 +175,7 @@ static int compute_score(struct sock *sk, struct net *net, static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, + bool exact_dif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -176,7 +186,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -212,6 +222,7 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + bool exact_dif = udp6_lib_exact_dif_match(net, skb); int score, badness, matches = 0, reuseport = 0; u32 hash = 0; @@ -223,7 +234,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -239,7 +250,8 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, + skb); } return result; } @@ -247,7 +259,8 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, + exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { -- cgit v1.2.3 From 30357d7d8aaf2a980ab17c2ce054b2b87e60af88 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 30 Jan 2017 12:07:37 -0800 Subject: lwtunnel: remove device arg to lwtunnel_build_state Nothing about lwt state requires a device reference, so remove the input argument. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 6 +++--- net/core/lwt_bpf.c | 2 +- net/core/lwtunnel.c | 4 ++-- net/ipv4/fib_semantics.c | 27 ++++++++------------------- net/ipv4/ip_tunnel_core.c | 4 ++-- net/ipv6/ila/ila_lwt.c | 2 +- net/ipv6/route.c | 2 +- net/ipv6/seg6_iptunnel.c | 2 +- net/mpls/mpls_iptunnel.c | 2 +- 9 files changed, 20 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 73dd87647460..45399ed132bf 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -33,7 +33,7 @@ struct lwtunnel_state { }; struct lwtunnel_encap_ops { - int (*build_state)(struct net_device *dev, struct nlattr *encap, + int (*build_state)(struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts); void (*destroy_state)(struct lwtunnel_state *lws); @@ -109,7 +109,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); -int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws); @@ -181,7 +181,7 @@ static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) return -EOPNOTSUPP; } -static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +static inline int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 03600459bcfd..0cfe7b0216c3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -237,7 +237,7 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, }; -static int bpf_build_state(struct net_device *dev, struct nlattr *nla, +static int bpf_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index c23465005f2f..6df9f8fabf0c 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -101,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, } EXPORT_SYMBOL(lwtunnel_encap_del_ops); -int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws) { @@ -116,7 +116,7 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); if (likely(ops && ops->build_state && try_module_get(ops->owner))) { - ret = ops->build_state(dev, encap, family, cfg, lws); + ret = ops->build_state(encap, family, cfg, lws); if (ret) module_put(ops->owner); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 319c66de92eb..6306a67880e8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -471,7 +471,6 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg) { - struct net *net = cfg->fc_nlinfo.nl_net; int ret; change_nexthops(fi) { @@ -503,16 +502,14 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla = nla_find(attrs, attrlen, RTA_ENCAP); if (nla) { struct lwtunnel_state *lwtstate; - struct net_device *dev = NULL; struct nlattr *nla_entype; nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (!nla_entype) goto err_inval; - if (cfg->fc_oif) - dev = __dev_get_by_index(net, cfg->fc_oif); - ret = lwtunnel_build_state(dev, nla_get_u16( + + ret = lwtunnel_build_state(nla_get_u16( nla_entype), nla, AF_INET, cfg, &lwtstate); @@ -597,21 +594,18 @@ static inline void fib_add_weight(struct fib_info *fi, #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int fib_encap_match(struct net *net, u16 encap_type, +static int fib_encap_match(u16 encap_type, struct nlattr *encap, - int oif, const struct fib_nh *nh, + const struct fib_nh *nh, const struct fib_config *cfg) { struct lwtunnel_state *lwtstate; - struct net_device *dev = NULL; int ret, result = 0; if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - if (oif) - dev = __dev_get_by_index(net, oif); - ret = lwtunnel_build_state(dev, encap_type, encap, + ret = lwtunnel_build_state(encap_type, encap, AF_INET, cfg, &lwtstate); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); @@ -623,7 +617,6 @@ static int fib_encap_match(struct net *net, u16 encap_type, int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { - struct net *net = cfg->fc_nlinfo.nl_net; #ifdef CONFIG_IP_ROUTE_MULTIPATH struct rtnexthop *rtnh; int remaining; @@ -634,9 +627,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) if (cfg->fc_oif || cfg->fc_gw) { if (cfg->fc_encap) { - if (fib_encap_match(net, cfg->fc_encap_type, - cfg->fc_encap, cfg->fc_oif, - fi->fib_nh, cfg)) + if (fib_encap_match(cfg->fc_encap_type, + cfg->fc_encap, fi->fib_nh, cfg)) return 1; } if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && @@ -1093,13 +1085,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; - struct net_device *dev = NULL; if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) goto err_inval; - if (cfg->fc_oif) - dev = __dev_get_by_index(net, cfg->fc_oif); - err = lwtunnel_build_state(dev, cfg->fc_encap_type, + err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET, cfg, &lwtstate); if (err) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9d6c10096d44..a31f47ccaad9 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -226,7 +226,7 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, }; -static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, +static int ip_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { @@ -323,7 +323,7 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, }; -static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, +static int ip6_tun_build_state(struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 13b5e85fe0d5..ce1aae4a7fc8 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -115,7 +115,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; -static int ila_build_state(struct net_device *dev, struct nlattr *nla, +static int ila_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 61d7006324ed..2563331b0532 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1897,7 +1897,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; - err = lwtunnel_build_state(dev, cfg->fc_encap_type, + err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET6, cfg, &lwtstate); if (err) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index c46f8cbf5ab5..6124e159c882 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -303,7 +303,7 @@ drop: return err; } -static int seg6_build_state(struct net_device *dev, struct nlattr *nla, +static int seg6_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 67b7a955de65..e4e4424f9eb1 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,7 +133,7 @@ drop: return -EINVAL; } -static int mpls_build_state(struct net_device *dev, struct nlattr *nla, +static int mpls_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { -- cgit v1.2.3 From f50f212749e8a28803af3628acbeb85ee0458ed5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Jan 2017 12:41:40 -0800 Subject: net: dsa: Add plumbing for port mirroring Add necessary plumbing at the slave network device level to have switch drivers implement ndo_setup_tc() and most particularly the cls_matchall classifier. We add support for two switch operations: port_add_mirror and port_del_mirror() which configure, on a per-port basis the mirror parameters requested from the cls_matchall classifier. Code is largely borrowed from the Mellanox Spectrum switch driver. Reviewed-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 33 +++++++++++++ net/dsa/dsa_priv.h | 3 ++ net/dsa/slave.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 172 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index d5d618c3de64..2cb77e64d648 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -20,6 +20,8 @@ #include #include +struct tc_action; + enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, DSA_TAG_PROTO_DSA, @@ -139,6 +141,28 @@ struct dsa_switch_tree { const struct dsa_device_ops *tag_ops; }; +/* TC matchall action types, only mirroring for now */ +enum dsa_port_mall_action_type { + DSA_PORT_MALL_MIRROR, +}; + +/* TC mirroring entry */ +struct dsa_mall_mirror_tc_entry { + u8 to_local_port; + bool ingress; +}; + +/* TC matchall entry */ +struct dsa_mall_tc_entry { + struct list_head list; + unsigned long cookie; + enum dsa_port_mall_action_type type; + union { + struct dsa_mall_mirror_tc_entry mirror; + }; +}; + + struct dsa_port { struct dsa_switch *ds; unsigned int index; @@ -385,6 +409,15 @@ struct dsa_switch_ops { struct ethtool_rxnfc *nfc, u32 *rule_locs); int (*set_rxnfc)(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc); + + /* + * TC integration + */ + int (*port_mirror_add)(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress); + void (*port_mirror_del)(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror); }; struct dsa_switch_driver { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 3022f2e42cdc..a5509b765fc0 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -41,6 +41,9 @@ struct dsa_slave_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif + + /* TC context */ + struct list_head mall_tc_list; }; /* dsa.c */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6881889e1a9b..09fc3e9462c1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -16,12 +16,17 @@ #include #include #include +#include #include #include +#include +#include #include #include #include "dsa_priv.h" +static bool dsa_slave_dev_check(struct net_device *dev); + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -995,6 +1000,133 @@ static int dsa_slave_get_phys_port_name(struct net_device *dev, return 0; } +static struct dsa_mall_tc_entry * +dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, + unsigned long cookie) +{ + struct dsa_mall_tc_entry *mall_tc_entry; + + list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) + if (mall_tc_entry->cookie == cookie) + return mall_tc_entry; + + return NULL; +} + +static int dsa_slave_add_cls_matchall(struct net_device *dev, + __be16 protocol, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = p->dp->ds; + struct net *net = dev_net(dev); + struct dsa_slave_priv *to_p; + struct net_device *to_dev; + const struct tc_action *a; + int err = -EOPNOTSUPP; + LIST_HEAD(actions); + int ifindex; + + if (!ds->ops->port_mirror_add) + return err; + + if (!tc_single_action(cls->exts)) + return err; + + tcf_exts_to_list(cls->exts, &actions); + a = list_first_entry(&actions, struct tc_action, list); + + if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { + struct dsa_mall_mirror_tc_entry *mirror; + + ifindex = tcf_mirred_ifindex(a); + to_dev = __dev_get_by_index(net, ifindex); + if (!to_dev) + return -EINVAL; + + if (!dsa_slave_dev_check(to_dev)) + return -EOPNOTSUPP; + + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = DSA_PORT_MALL_MIRROR; + mirror = &mall_tc_entry->mirror; + + to_p = netdev_priv(to_dev); + + mirror->to_local_port = to_p->dp->index; + mirror->ingress = ingress; + + err = ds->ops->port_mirror_add(ds, p->dp->index, mirror, + ingress); + if (err) { + kfree(mall_tc_entry); + return err; + } + + list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + } + + return 0; +} + +static void dsa_slave_del_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->port_mirror_del) + return; + + mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); + if (!mall_tc_entry) + return; + + list_del(&mall_tc_entry->list); + + switch (mall_tc_entry->type) { + case DSA_PORT_MALL_MIRROR: + ds->ops->port_mirror_del(ds, p->dp->index, + &mall_tc_entry->mirror); + break; + default: + WARN_ON(1); + } + + kfree(mall_tc_entry); +} + +static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, + __be16 protocol, struct tc_to_netdev *tc) +{ + bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + int ret = -EOPNOTSUPP; + + switch (tc->type) { + case TC_SETUP_MATCHALL: + switch (tc->cls_mall->command) { + case TC_CLSMATCHALL_REPLACE: + return dsa_slave_add_cls_matchall(dev, protocol, + tc->cls_mall, + ingress); + case TC_CLSMATCHALL_DESTROY: + dsa_slave_del_cls_matchall(dev, tc->cls_mall); + return 0; + } + default: + break; + } + + return ret; +} + void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) { ops->get_sset_count = dsa_cpu_port_get_sset_count; @@ -1069,6 +1201,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, + .ndo_setup_tc = dsa_slave_setup_tc, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { @@ -1285,7 +1418,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features; + slave_dev->features = master->vlan_features | NETIF_F_HW_TC; + slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; @@ -1304,6 +1438,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, p = netdev_priv(slave_dev); p->dp = &ds->ports[port]; + INIT_LIST_HEAD(&p->mall_tc_list); p->xmit = dst->tag_ops->xmit; p->old_pause = -1; -- cgit v1.2.3 From 11df4b760f11ca7528c62b1c4b870735d1c62116 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:53 +0100 Subject: netfilter: conntrack: no need to pass ctinfo to error handler It is never accessed for reading and the only places that write to it are the icmp(6) handlers, which also set skb->nfct (and skb->nfctinfo). The conntrack core specifically checks for attached skb->nfct after ->error() invocation and returns early in this case. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 12 ++++++------ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 ++++++------ net/netfilter/nf_conntrack_core.c | 3 +-- net/netfilter/nf_conntrack_proto_dccp.c | 1 - net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 1 - net/netfilter/nf_conntrack_proto_udp.c | 3 +-- 8 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e7b836590f0b..85e993e278d5 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -55,7 +55,7 @@ struct nf_conntrack_l4proto { void (*destroy)(struct nf_conn *ct); int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d075b3cf2400..566afac98a88 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -128,13 +128,13 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { @@ -169,11 +169,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, } if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + skb->nfctinfo = ctinfo; return NF_ACCEPT; } @@ -181,7 +181,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, static int icmp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -225,7 +225,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f5a61bc3ec2b..44b9af3f813e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -145,12 +145,12 @@ static int icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -176,7 +176,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), &intuple); @@ -185,19 +185,19 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; } /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + skb->nfctinfo = ctinfo; return NF_ACCEPT; } static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; @@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3a073cd9fcf4..86186a2e2715 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1326,8 +1326,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, - pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b68ce6ac13b3..93dd1c5b7bff 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,7 +561,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, static int dccp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 44a647418948..33279aab583d 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -508,7 +508,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, } static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u8 pf, unsigned int hooknum) { const struct sctphdr *sh; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 69f687740c76..b122e9dacfed 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -750,7 +750,6 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae63944c9dc4..f6ebce6178ca 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -112,7 +112,6 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u8 pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -162,7 +161,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, #endif static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum) { -- cgit v1.2.3 From 97a6ad13decc16c5adbf181283932daba7e17faf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:55 +0100 Subject: netfilter: reduce direct skb->nfct usage Next patch makes direct skb->nfct access illegal, reduce noise in next patch by using accessors we already have. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 9 ++++++--- net/netfilter/nf_conntrack_core.c | 15 +++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd6018a9ee24..2a344ebd7ebe 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1554,10 +1554,13 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; + struct nf_conn *untracked; + + nf_conntrack_put(&ct->ct_general); + untracked = nf_ct_untracked_get(); + nf_conntrack_get(&untracked->ct_general); + skb->nfct = &untracked->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); } #endif } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 86186a2e2715..adb7af3a4c4c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -686,8 +686,11 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, !nfct_nat(ct) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); - nf_conntrack_put(skb->nfct); + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); /* Assign conntrack already in hashes to this skbuff. Don't * modify skb->nfctinfo to ensure consistent stateful filtering. */ @@ -1288,7 +1291,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct, *tmpl = NULL; + struct nf_conn *ct, *tmpl; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -1298,9 +1301,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - if (skb->nfct) { + tmpl = nf_ct_get(skb, &ctinfo); + if (tmpl) { /* Previously seen (loopback or untracked)? Ignore. */ - tmpl = (struct nf_conn *)skb->nfct; if (!nf_ct_is_template(tmpl)) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; @@ -1364,7 +1367,7 @@ repeat: /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(skb->nfct); + nf_conntrack_put(&ct->ct_general); skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) -- cgit v1.2.3 From cb9c68363efb6d1f950ec55fb06e031ee70db5fc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:56 +0100 Subject: skbuff: add and use skb_nfct helper Followup patch renames skb->nfct and changes its type so add a helper to avoid intrusive rename change later. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 13 ++++++++++--- include/net/netfilter/nf_conntrack_core.h | 2 +- net/core/skbuff.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 8 ++++---- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 4 ++-- net/ipv4/netfilter/nf_dup_ipv4.c | 2 +- net/ipv6/netfilter/ip6t_SYNPROXY.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++-- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 4 ++-- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_nat_helper.c | 2 +- net/netfilter/xt_CT.c | 2 +- net/openvswitch/conntrack.c | 6 +++--- net/sched/cls_flow.c | 2 +- 15 files changed, 36 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..276431e047af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3553,6 +3553,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, skb->csum = csum_add(skb->csum, delta); } +static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return skb->nfct; +#else + return NULL; +#endif +} + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -3652,9 +3661,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_XFRM) !skb->sp && #endif -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - !skb->nfct && -#endif + !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..84ec7ca5f195 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -62,7 +62,7 @@ int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int nf_conntrack_confirm(struct sk_buff *skb) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; if (ct && !nf_ct_is_untracked(ct)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5a03730fbc1a..cac3ebfb4b45 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -655,7 +655,7 @@ static void skb_release_head_state(struct sk_buff *skb) skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); + nf_conntrack_put(skb_nfct(skb)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 30c0de53e254..a12d4f0aa674 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -107,8 +107,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -230,8 +230,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 566afac98a88..478a025909fc 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -137,7 +137,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 49bd6a54404f..346bf7ccac08 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -75,7 +75,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv, #if !IS_ENABLED(CONFIG_NF_NAT) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif #endif diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index a981ef7151ca..1a5e1f53ceaa 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -71,7 +71,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, nf_reset(skb); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 98c8dd38575a..2dc01d2c6ec0 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -121,8 +121,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -244,8 +244,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 44b9af3f813e..09f1661a4e88 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, @@ -224,7 +224,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, noct_valid_new[type]) { skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 8e0bdd058787..ada60d1a991b 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -37,7 +37,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -61,7 +61,7 @@ static unsigned int ipv6_defrag(void *priv, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index adb7af3a4c4c..78aebf0ee6e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1357,7 +1357,7 @@ repeat: goto out; } - NF_CT_ASSERT(skb->nfct); + NF_CT_ASSERT(skb_nfct(skb)); /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); @@ -1528,7 +1528,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) /* Attach to new skbuff, and increment count */ nskb->nfct = &ct->ct_general; nskb->nfctinfo = ctinfo; - nf_conntrack_get(nskb->nfct); + nf_conntrack_get(skb_nfct(nskb)); } /* Bring out ya dead! */ diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 2840abb5bb99..211661cb2c90 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -60,7 +60,7 @@ static void mangle_contents(struct sk_buff *skb, __skb_trim(skb, skb->len + rep_len - match_len); } - if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + if (nf_ct_l3num((struct nf_conn *)skb_nfct(skb)) == NFPROTO_IPV4) { /* fix IP hdr checksum information */ ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 26b0bccfa0c5..cd7e29910ae1 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -415,7 +415,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); return XT_CONTINUE; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 6b78bab27755..452557946147 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -721,8 +721,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, /* Associate skb with specified zone. */ if (tmpl) { - if (skb->nfct) - nf_conntrack_put(skb->nfct); + if (skb_nfct(skb)) + nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); skb->nfct = &tmpl->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -819,7 +819,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, if (err) return err; - ct = (struct nf_conn *)skb->nfct; + ct = (struct nf_conn *)skb_nfct(skb); if (ct) nf_ct_deliver_cached_events(ct); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6575aba87630..3d6b9286c203 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return addr_fold(skb->nfct); + return addr_fold(skb_nfct(skb)); #else return 0; #endif -- cgit v1.2.3 From c74454fadd5ea6fc866ffe2c417a0dba56b2bf1c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:57 +0100 Subject: netfilter: add and use nf_ct_set helper Add a helper to assign a nf_conn entry and the ctinfo bits to an sk_buff. This avoids changing code in followup patch that merges skb->nfct and skb->nfctinfo into skb->_nfct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 +-- include/net/netfilter/nf_conntrack.h | 8 ++++++++ net/ipv4/netfilter/ipt_SYNPROXY.c | 3 +-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 3 +-- net/ipv4/netfilter/nf_dup_ipv4.c | 3 +-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 3 +-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++---- net/ipv6/netfilter/nf_dup_ipv6.c | 3 +-- net/netfilter/nf_conntrack_core.c | 11 +++-------- net/netfilter/nft_ct.c | 3 +-- net/netfilter/xt_CT.c | 6 ++---- net/openvswitch/conntrack.c | 6 ++---- 12 files changed, 24 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2a344ebd7ebe..4b46c591b542 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1559,8 +1559,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) nf_conntrack_put(&ct->ct_general); untracked = nf_ct_untracked_get(); nf_conntrack_get(&untracked->ct_general); - skb->nfct = &untracked->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, untracked, IP_CT_NEW); } #endif } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5916aa9ab3f0..d704aed11684 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -34,6 +34,7 @@ union nf_conntrack_proto { struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_gre gre; + unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { @@ -341,6 +342,13 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +static inline void +nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) +{ + skb->nfct = &ct->ct_general; + skb->nfctinfo = info; +} + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index a12d4f0aa674..3240a2614e82 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -57,8 +57,7 @@ synproxy_send_tcp(struct net *net, goto free_nskb; if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 478a025909fc..73c591d8a9a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -172,8 +172,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index 1a5e1f53ceaa..f0dbff05fc28 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_reset(skb); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); #endif /* diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2dc01d2c6ec0..4ef1ddd4bbbd 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -71,8 +71,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set(nskb, dst); if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 09f1661a4e88..d2c2ccbfbe72 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -189,8 +189,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, } /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } @@ -222,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 5f52e5f90e7e..ff04f6a7f45b 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb->nfct); #endif if (hooknum == NF_INET_PRE_ROUTING || diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 78aebf0ee6e3..c9bd10747864 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -691,10 +691,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_conntrack_put(&loser_ct->ct_general); - /* Assign conntrack already in hashes to this skbuff. Don't - * modify skb->nfctinfo to ensure consistent stateful filtering. - */ - skb->nfct = &ct->ct_general; + nf_ct_set(skb, ct, oldinfo); return NF_ACCEPT; } NF_CT_STAT_INC(net, drop); @@ -1282,8 +1279,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } *set_reply = 0; } - skb->nfct = &ct->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, ct, *ctinfo); return ct; } @@ -1526,8 +1522,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ - nskb->nfct = &ct->ct_general; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, ct, ctinfo); nf_conntrack_get(skb_nfct(nskb)); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d774d7823688..66a2377510e1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -554,8 +554,7 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); } static struct nft_expr_type nft_notrack_type; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index cd7e29910ae1..51f00e1e1208 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -30,8 +30,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (!ct) ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, ct, IP_CT_NEW); return XT_CONTINUE; } @@ -413,8 +412,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb->nfct != NULL) return XT_CONTINUE; - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); nf_conntrack_get(skb_nfct(skb)); return XT_CONTINUE; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 452557946147..d1fbfcaa009a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -460,8 +460,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); - skb->nfct = &ct->ct_general; - skb->nfctinfo = ovs_ct_get_info(h); + nf_ct_set(skb, ct, ovs_ct_get_info(h)); return ct; } @@ -724,8 +723,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, if (skb_nfct(skb)) nf_conntrack_put(skb_nfct(skb)); nf_conntrack_get(&tmpl->ct_general); - skb->nfct = &tmpl->ct_general; - skb->nfctinfo = IP_CT_NEW; + nf_ct_set(skb, tmpl, IP_CT_NEW); } err = nf_conntrack_in(net, info->family, -- cgit v1.2.3 From 303223092081963513494b4377fa1ac9e362ed4b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:58 +0100 Subject: netfilter: guarantee 8 byte minalign for template addresses The next change will merge skb->nfct pointer and skb->nfctinfo status bits into single skb->_nfct (unsigned long) area. For this to work nf_conn addresses must always be aligned at least on an 8 byte boundary since we will need the lower 3bits to store nfctinfo. Conntrack templates are allocated via kmalloc. kbuild test robot reported BUILD_BUG_ON failed: NFCT_INFOMASK >= ARCH_KMALLOC_MINALIGN on v1 of this patchset, so not all platforms meet this requirement. Do manual alignment if needed, the alignment offset is stored in the nf_conn entry protocol area. This works because templates are not handed off to L4 protocol trackers. Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 29 ++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d704aed11684..06d3d2d24fe0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -163,6 +163,8 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +#define NFCT_INFOMASK 7UL + /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c9bd10747864..768968fba7f6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -350,16 +350,31 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) + /* Released via destroy_conntrack() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) { - struct nf_conn *tmpl; + struct nf_conn *tmpl, *p; - tmpl = kzalloc(sizeof(*tmpl), flags); - if (tmpl == NULL) - return NULL; + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { + tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); + if (!tmpl) + return NULL; + + p = tmpl; + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + if (tmpl != p) { + tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; + } + } else { + tmpl = kzalloc(sizeof(*tmpl), flags); + if (!tmpl) + return NULL; + } tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); @@ -374,7 +389,11 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl) { nf_ct_ext_destroy(tmpl); nf_ct_ext_free(tmpl); - kfree(tmpl); + + if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) + kfree((char *)tmpl - tmpl->proto.tmpl_padto); + else + kfree(tmpl); } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); -- cgit v1.2.3 From a9e419dc7be6997409dca6d1b9daf3cc7046902f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:59 +0100 Subject: netfilter: merge ctinfo into nfct pointer storage area After this change conntrack operations (lookup, creation, matching from ruleset) only access one instead of two sk_buff cache lines. This works for normal conntracks because those are allocated from a slab that guarantees hw cacheline or 8byte alignment (whatever is larger) so the 3 bits needed for ctinfo won't overlap with nf_conn addresses. Template allocation now does manual address alignment (see previous change) on arches that don't have sufficent kmalloc min alignment. Some spots intentionally use skb->_nfct instead of skb_nfct() helpers, this is to avoid undoing the skb_nfct() use when we remove untracked conntrack object in the future. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 21 +++++++++------------ include/net/netfilter/nf_conntrack.h | 11 ++++++----- net/ipv6/netfilter/nf_dup_ipv6.c | 2 +- net/netfilter/core.c | 2 +- net/netfilter/nf_conntrack_core.c | 11 ++++++----- net/netfilter/nf_conntrack_standalone.c | 3 +++ net/netfilter/xt_CT.c | 4 ++-- 7 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 276431e047af..ac0bc085b139 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header - * @nfctinfo: Relationship of this skb to the connection * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs @@ -594,7 +593,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function - * @nfct: Associated connection, if any + * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -668,7 +667,7 @@ struct sk_buff { struct sec_path *sp; #endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; + unsigned long _nfct; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info *nf_bridge; @@ -721,7 +720,6 @@ struct sk_buff { __u8 pkt_type:3; __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nfctinfo:3; __u8 nf_trace:1; __u8 ip_summed:2; @@ -836,6 +834,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) +#define SKB_NFCT_PTRMASK ~(7UL) /** * skb_dst - returns skb dst_entry * @skb: buffer @@ -3556,7 +3555,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return skb->nfct; + return (void *)(skb->_nfct & SKB_NFCT_PTRMASK); #else return NULL; #endif @@ -3590,8 +3589,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) static inline void nf_reset(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put(skb_nfct(skb)); + skb->_nfct = 0; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); @@ -3611,10 +3610,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - dst->nfct = src->nfct; - nf_conntrack_get(src->nfct); - if (copy) - dst->nfctinfo = src->nfctinfo; + dst->_nfct = src->_nfct; + nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; @@ -3629,7 +3626,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(dst->nfct); + nf_conntrack_put(skb_nfct(dst)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 06d3d2d24fe0..f540f9ad2af4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,7 +76,7 @@ struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * - * Hint, SKB address this struct and refcnt via skb->nfct and + * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * beware nf_ct_get() is different and don't inc refcnt. @@ -164,13 +164,15 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); #define NFCT_INFOMASK 7UL +#define NFCT_PTRMASK ~(NFCT_INFOMASK) /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - *ctinfo = skb->nfctinfo; - return (struct nf_conn *)skb->nfct; + *ctinfo = skb->_nfct & NFCT_INFOMASK; + + return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ @@ -347,8 +349,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { - skb->nfct = &ct->ct_general; - skb->nfctinfo = info; + skb->_nfct = (unsigned long)ct | info; } #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index ff04f6a7f45b..888ecd106e5f 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -59,7 +59,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb->nfct); + nf_conntrack_get(skb_nfct(skb)); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ce6adfae521a..a87a6f8a74d8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -375,7 +375,7 @@ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { void (*attach)(struct sk_buff *, const struct sk_buff *); - if (skb->nfct) { + if (skb->_nfct) { rcu_read_lock(); attach = rcu_dereference(ip_ct_attach); if (attach) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 768968fba7f6..47c4ea53daa6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1239,7 +1239,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } -/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */ static inline struct nf_conn * resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -1323,7 +1323,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } - skb->nfct = NULL; + skb->_nfct = 0; } /* rcu_read_lock()ed by nf_hook_thresh */ @@ -1352,7 +1352,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } /* ICMP[v6] protocol trackers may assign one conntrack. */ - if (skb->nfct) + if (skb->_nfct) goto out; } repeat: @@ -1383,7 +1383,7 @@ repeat: * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(&ct->ct_general); - skb->nfct = NULL; + skb->_nfct = 0; NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); @@ -1878,7 +1878,8 @@ int nf_conntrack_init_start(void) nf_conntrack_max = max_factor * nf_conntrack_htable_size; nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), 0, + sizeof(struct nf_conn), + NFCT_INFOMASK + 1, SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index d009ae663453..2256147dcaad 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -642,6 +642,9 @@ static int __init nf_conntrack_standalone_init(void) if (ret < 0) goto out_start; + BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK); + BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); + #ifdef CONFIG_SYSCTL nf_ct_netfilter_header = register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 51f00e1e1208..b008db0184b8 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -23,7 +23,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; /* special case the untracked ct : we want the percpu object */ @@ -409,7 +409,7 @@ static unsigned int notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if (skb->_nfct != 0) return XT_CONTINUE; nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); -- cgit v1.2.3 From 90c1aff702d449a1a248c4829d51c0bc677f968e Mon Sep 17 00:00:00 2001 From: David Windsor Date: Mon, 23 Jan 2017 22:24:29 -0500 Subject: ipvs: free ip_vs_dest structs when refcnt=0 Currently, the ip_vs_dest cache frees ip_vs_dest objects when their reference count becomes < 0. Aside from not being semantically sound, this is problematic for the new type refcount_t, which will be introduced shortly in a separate patch. refcount_t is the new kernel type for holding reference counts, and provides overflow protection and a constrained interface relative to atomic_t (the type currently being used for kernel reference counts). Per Julian Anastasov: "The problem is that dest_trash currently holds deleted dests (unlinked from RCU lists) with refcnt=0." Changing dest_trash to hold dest with refcnt=1 will allow us to free ip_vs_dest structs when their refcnt=0, in ip_vs_dest_put_and_free(). Signed-off-by: David Windsor Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4b46c591b542..7bdfa7d78363 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1421,7 +1421,7 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest) static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) { - if (atomic_dec_return(&dest->refcnt) < 0) + if (atomic_dec_and_test(&dest->refcnt)) kfree(dest); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 55e0169caa4c..5fc4836e7c79 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -711,7 +711,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, dest->vport == svc->port))) { /* HIT */ list_del(&dest->t_list); - ip_vs_dest_hold(dest); goto out; } } @@ -741,7 +740,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 0, so we simply release them here. + * be 1, so we simply release them here. */ static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { @@ -1080,11 +1079,10 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash without reference */ + /* dest lives in trash with reference */ list_add(&dest->t_list, &ipvs->dest_trash); dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); - ip_vs_dest_put(dest); } @@ -1160,7 +1158,7 @@ static void ip_vs_dest_trash_expire(unsigned long data) spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - if (atomic_read(&dest->refcnt) > 0) + if (atomic_read(&dest->refcnt) > 1) continue; if (dest->idle_start) { if (time_before(now, dest->idle_start + -- cgit v1.2.3 From 2851940ffee313e0ff12540a8e11a8c54dea9c65 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Tue, 31 Jan 2017 10:30:06 +0100 Subject: netfilter: allow logging from non-init namespaces Commit 69b34fb996b2 ("netfilter: xt_LOG: add net namespace support for xt_LOG") disabled logging packets using the LOG target from non-init namespaces. The motivation was to prevent containers from flooding kernel log of the host. The plan was to keep it that way until syslog namespace implementation allows containers to log in a safe way. However, the work on syslog namespace seems to have hit a dead end somewhere in 2013 and there are users who want to use xt_LOG in all network namespaces. This patch allows to do so by setting /proc/sys/net/netfilter/nf_log_all_netns to a nonzero value. This sysctl is only accessible from init_net so that one cannot switch the behaviour from inside a container. Signed-off-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/netfilter-sysctl.txt | 10 ++++++++++ include/net/netfilter/nf_log.h | 3 +++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- net/netfilter/nf_log.c | 24 ++++++++++++++++++++++++ 7 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 Documentation/networking/netfilter-sysctl.txt (limited to 'include/net') diff --git a/Documentation/networking/netfilter-sysctl.txt b/Documentation/networking/netfilter-sysctl.txt new file mode 100644 index 000000000000..55791e50e169 --- /dev/null +++ b/Documentation/networking/netfilter-sysctl.txt @@ -0,0 +1,10 @@ +/proc/sys/net/netfilter/* Variables: + +nf_log_all_netns - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + By default, only init_net namespace can log packets into kernel log + with LOG target; this aims to prevent containers from flooding host + kernel log. If enabled, this target also works in other network + namespaces. This variable is only accessible from init_net. diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 450f87f95415..42e0696f38d8 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -51,6 +51,9 @@ struct nf_logger { struct module *me; }; +/* sysctl_nf_log_all_netns - allow LOG target in all network namespaces */ +extern int sysctl_nf_log_all_netns; + /* Function to register/unregister log function. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index e88bd4827ac1..98b9c8e8615e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,7 +78,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int bitmask; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; spin_lock_bh(&ebt_log_lock); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index b24795e2ee6d..f6f713376e6e 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -87,7 +87,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 856648966f4c..c83a9963269b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -319,7 +319,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 57d86066a13b..055c51b80f5d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -351,7 +351,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3dca90dc24ad..0a034f52b912 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,6 +16,9 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 +int sysctl_nf_log_all_netns __read_mostly; +EXPORT_SYMBOL(sysctl_nf_log_all_netns); + static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -414,6 +417,18 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table_header *nf_log_sysctl_fhdr; + +static struct ctl_table nf_log_sysctl_ftable[] = { + { + .procname = "nf_log_all_netns", + .data = &sysctl_nf_log_all_netns, + .maxlen = sizeof(sysctl_nf_log_all_netns), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -483,6 +498,10 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; } + nf_log_sysctl_fhdr = register_net_sysctl(net, "net/netfilter", + nf_log_sysctl_ftable); + if (!nf_log_sysctl_fhdr) + goto err_freg; } for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) @@ -499,6 +518,9 @@ static int netfilter_log_sysctl_init(struct net *net) err_reg: if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); +err_freg: err_alloc: return -ENOMEM; } @@ -511,6 +533,8 @@ static void netfilter_log_sysctl_exit(struct net *net) unregister_net_sysctl_table(net->nf.nf_log_dir_header); if (!net_eq(net, &init_net)) kfree(table); + else + unregister_net_sysctl_table(nf_log_sysctl_fhdr); } #else static int netfilter_log_sysctl_init(struct net *net) -- cgit v1.2.3 From 1d5e7c859e81a66674d194c346119d154d31e9dc Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 1 Feb 2017 15:30:01 +0200 Subject: net/sched: act_ife: Unexport ife_tlv_meta_encode As the function ife_tlv_meta_encode is not used by any other module, unexport it and make it static for the act_ife module. Signed-off-by: Yotam Gigi Signed-off-by: Jamal Hadi Salim Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 -- net/sched/act_ife.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 9fd2bea0a6e0..f37e7516ab28 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -45,8 +45,6 @@ struct tcf_meta_ops { int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); -int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, - const void *dval); int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 921fb20eaa7c..70148c10ede9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -48,7 +48,8 @@ static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = { /* Caller takes care of presenting data in network order */ -int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) +static int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, + const void *dval) { u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ @@ -61,7 +62,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) return totlen; } -EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) { -- cgit v1.2.3 From 1ce8460496c05379c66edc178c3c55ca4e953044 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 1 Feb 2017 15:30:02 +0200 Subject: net: Introduce ife encapsulation module This module is responsible for the ife encapsulation protocol encode/decode logics. That module can: - ife_encode: encode skb and reserve space for the ife meta header - ife_decode: decode skb and extract the meta header size - ife_tlv_meta_encode - encodes one tlv entry into the reserved ife header space. - ife_tlv_meta_decode - decodes one tlv entry from the packet - ife_tlv_meta_next - advance to the next tlv Reviewed-by: Jiri Pirko Signed-off-by: Yotam Gigi Signed-off-by: Jamal Hadi Salim Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- MAINTAINERS | 7 +++ include/net/ife.h | 51 +++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/ife.h | 18 ++++++ net/Kconfig | 1 + net/Makefile | 1 + net/ife/Kconfig | 16 ++++++ net/ife/Makefile | 5 ++ net/ife/ife.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 242 insertions(+) create mode 100644 include/net/ife.h create mode 100644 include/uapi/linux/ife.h create mode 100644 net/ife/Kconfig create mode 100644 net/ife/Makefile create mode 100644 net/ife/ife.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index 5e637e2b3ff9..2abda6cb3150 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6250,6 +6250,13 @@ F: include/net/cfg802154.h F: include/net/ieee802154_netdev.h F: Documentation/networking/ieee802154.txt +IFE PROTOCOL +M: Yotam Gigi +M: Jamal Hadi Salim +F: net/ife +F: include/net/ife.h +F: include/uapi/linux/ife.h + IGORPLUG-USB IR RECEIVER M: Sean Young L: linux-media@vger.kernel.org diff --git a/include/net/ife.h b/include/net/ife.h new file mode 100644 index 000000000000..2d87d6898b0a --- /dev/null +++ b/include/net/ife.h @@ -0,0 +1,51 @@ +#ifndef __NET_IFE_H +#define __NET_IFE_H + +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_NET_IFE) + +void *ife_encode(struct sk_buff *skb, u16 metalen); +void *ife_decode(struct sk_buff *skb, u16 *metalen); + +void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen); +int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, + const void *dval); + +void *ife_tlv_meta_next(void *skbdata); + +#else + +static inline void *ife_encode(struct sk_buff *skb, u16 metalen) +{ + return NULL; +} + +static inline void *ife_decode(struct sk_buff *skb, u16 *metalen) +{ + return NULL; +} + +static inline void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, + u16 *totlen) +{ + return NULL; +} + +static inline int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, + const void *dval) +{ + return 0; +} + +static inline void *ife_tlv_meta_next(void *skbdata) +{ + return NULL; +} + +#endif + +#endif /* __NET_IFE_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 486e050e64c5..a2e90722a4c4 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -195,6 +195,7 @@ header-y += if_tun.h header-y += if_tunnel.h header-y += if_vlan.h header-y += if_x25.h +header-y += ife.h header-y += igmp.h header-y += ila.h header-y += in6.h diff --git a/include/uapi/linux/ife.h b/include/uapi/linux/ife.h new file mode 100644 index 000000000000..2954da32e012 --- /dev/null +++ b/include/uapi/linux/ife.h @@ -0,0 +1,18 @@ +#ifndef __UAPI_IFE_H +#define __UAPI_IFE_H + +#define IFE_METAHDRLEN 2 + +enum { + IFE_META_SKBMARK = 1, + IFE_META_HASHID, + IFE_META_PRIO, + IFE_META_QMAP, + IFE_META_TCINDEX, + __IFE_META_MAX +}; + +/*Can be overridden at runtime by module option*/ +#define IFE_META_MAX (__IFE_META_MAX - 1) + +#endif diff --git a/net/Kconfig b/net/Kconfig index ce4aee69fc0d..2f2842d2d3ed 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -391,6 +391,7 @@ source "net/caif/Kconfig" source "net/ceph/Kconfig" source "net/nfc/Kconfig" source "net/psample/Kconfig" +source "net/ife/Kconfig" config LWTUNNEL bool "Network light weight tunnels" diff --git a/net/Makefile b/net/Makefile index 7d41de48310e..9b681550e3a3 100644 --- a/net/Makefile +++ b/net/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_PSAMPLE) += psample/ +obj-$(CONFIG_NET_IFE) += ife/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_MPLS) += mpls/ diff --git a/net/ife/Kconfig b/net/ife/Kconfig new file mode 100644 index 000000000000..31e48b652c7c --- /dev/null +++ b/net/ife/Kconfig @@ -0,0 +1,16 @@ +# +# IFE subsystem configuration +# + +menuconfig NET_IFE + depends on NET + tristate "Inter-FE based on IETF ForCES InterFE LFB" + default n + help + Say Y here to add support of IFE encapsulation protocol + For details refer to netdev01 paper: + "Distributing Linux Traffic Control Classifier-Action Subsystem" + Authors: Jamal Hadi Salim and Damascene M. Joachimpillai + + To compile this support as a module, choose M here: the module will + be called ife. diff --git a/net/ife/Makefile b/net/ife/Makefile new file mode 100644 index 000000000000..2a90d97746cc --- /dev/null +++ b/net/ife/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the IFE encapsulation protocol +# + +obj-$(CONFIG_NET_IFE) += ife.o diff --git a/net/ife/ife.c b/net/ife/ife.c new file mode 100644 index 000000000000..f360341c72eb --- /dev/null +++ b/net/ife/ife.c @@ -0,0 +1,142 @@ +/* + * net/ife/ife.c - Inter-FE protocol based on ForCES WG InterFE LFB + * Copyright (c) 2015 Jamal Hadi Salim + * Copyright (c) 2017 Yotam Gigi + * + * Refer to: draft-ietf-forces-interfelfb-03 and netdev01 paper: + * "Distributing Linux Traffic Control Classifier-Action Subsystem" + * Authors: Jamal Hadi Salim and Damascene M. Joachimpillai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ifeheadr { + __be16 metalen; + u8 tlv_data[]; +}; + +void *ife_encode(struct sk_buff *skb, u16 metalen) +{ + /* OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA + * where ORIGDATA = original ethernet header ... + */ + int hdrm = metalen + IFE_METAHDRLEN; + int total_push = hdrm + skb->dev->hard_header_len; + struct ifeheadr *ifehdr; + struct ethhdr *iethh; /* inner ether header */ + int skboff = 0; + int err; + + err = skb_cow_head(skb, total_push); + if (unlikely(err)) + return NULL; + + iethh = (struct ethhdr *) skb->data; + + __skb_push(skb, total_push); + memcpy(skb->data, iethh, skb->dev->hard_header_len); + skb_reset_mac_header(skb); + skboff += skb->dev->hard_header_len; + + /* total metadata length */ + ifehdr = (struct ifeheadr *) (skb->data + skboff); + metalen += IFE_METAHDRLEN; + ifehdr->metalen = htons(metalen); + + return ifehdr->tlv_data; +} +EXPORT_SYMBOL_GPL(ife_encode); + +void *ife_decode(struct sk_buff *skb, u16 *metalen) +{ + struct ifeheadr *ifehdr; + int total_pull; + u16 ifehdrln; + + ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len); + ifehdrln = ntohs(ifehdr->metalen); + total_pull = skb->dev->hard_header_len + ifehdrln; + + if (unlikely(ifehdrln < 2)) + return NULL; + + if (unlikely(!pskb_may_pull(skb, total_pull))) + return NULL; + + skb_set_mac_header(skb, total_pull); + __skb_pull(skb, total_pull); + *metalen = ifehdrln - IFE_METAHDRLEN; + + return &ifehdr->tlv_data; +} +EXPORT_SYMBOL_GPL(ife_decode); + +struct meta_tlvhdr { + __be16 type; + __be16 len; +}; + +/* Caller takes care of presenting data in network order + */ +void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen) +{ + struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata; + + *dlen = ntohs(tlv->len) - NLA_HDRLEN; + *attrtype = ntohs(tlv->type); + + if (totlen) + *totlen = nla_total_size(*dlen); + + return skbdata + sizeof(struct meta_tlvhdr); +} +EXPORT_SYMBOL_GPL(ife_tlv_meta_decode); + +void *ife_tlv_meta_next(void *skbdata) +{ + struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata; + u16 tlvlen = ntohs(tlv->len); + + tlvlen = NLA_ALIGN(tlvlen); + + return skbdata + tlvlen; +} +EXPORT_SYMBOL_GPL(ife_tlv_meta_next); + +/* Caller takes care of presenting data in network order + */ +int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) +{ + __be32 *tlv = (__be32 *) (skbdata); + u16 totlen = nla_total_size(dlen); /*alignment + hdr */ + char *dptr = (char *) tlv + NLA_HDRLEN; + u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); + + *tlv = htonl(htlv); + memset(dptr, 0, totlen - NLA_HDRLEN); + memcpy(dptr, dval, dlen); + + return totlen; +} +EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); + +MODULE_AUTHOR("Jamal Hadi Salim "); +MODULE_AUTHOR("Yotam Gigi "); +MODULE_DESCRIPTION("Inter-FE LFB action"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 295a6e06d21e1f469c9f38b00125a13b60ad4e7c Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 1 Feb 2017 15:30:03 +0200 Subject: net/sched: act_ife: Change to use ife module Use the encode/decode functionality from the ife module instead of using implementation inside the act_ife. Reviewed-by: Jiri Pirko Signed-off-by: Yotam Gigi Signed-off-by: Jamal Hadi Salim Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 1 - include/uapi/linux/tc_act/tc_ife.h | 10 +--- net/sched/Kconfig | 1 + net/sched/act_ife.c | 110 +++++++++++-------------------------- 4 files changed, 34 insertions(+), 88 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index f37e7516ab28..30ba459ddd34 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -6,7 +6,6 @@ #include #include -#define IFE_METAHDRLEN 2 struct tcf_ife_info { struct tc_action common; u8 eth_dst[ETH_ALEN]; diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index cd18360eca24..7c2817866c97 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -3,6 +3,7 @@ #include #include +#include #define TCA_ACT_IFE 25 /* Flag bits for now just encoding/decoding; mutually exclusive */ @@ -28,13 +29,4 @@ enum { }; #define TCA_IFE_MAX (__TCA_IFE_MAX - 1) -#define IFE_META_SKBMARK 1 -#define IFE_META_HASHID 2 -#define IFE_META_PRIO 3 -#define IFE_META_QMAP 4 -#define IFE_META_TCINDEX 5 -/*Can be overridden at runtime by module option*/ -#define __IFE_META_MAX 6 -#define IFE_META_MAX (__IFE_META_MAX - 1) - #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 72cfa3a6bac0..403790cce7d2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -776,6 +776,7 @@ config NET_ACT_SKBMOD config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT + select NET_IFE ---help--- Say Y here to allow for sourcing and terminating metadata For details refer to netdev01 paper: diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 70148c10ede9..71e7ff22f7c9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -32,6 +32,7 @@ #include #include #include +#include #define IFE_TAB_MASK 15 @@ -46,23 +47,6 @@ static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = { [TCA_IFE_TYPE] = { .type = NLA_U16}, }; -/* Caller takes care of presenting data in network order -*/ -static int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, - const void *dval) -{ - u32 *tlv = (u32 *)(skbdata); - u16 totlen = nla_total_size(dlen); /*alignment + hdr */ - char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); - - *tlv = htonl(htlv); - memset(dptr, 0, totlen - NLA_HDRLEN); - memcpy(dptr, dval, dlen); - - return totlen; -} - int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) { u16 edata = 0; @@ -637,69 +621,59 @@ int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, return 0; } -struct ifeheadr { - __be16 metalen; - u8 tlv_data[]; -}; - -struct meta_tlvhdr { - __be16 type; - __be16 len; -}; - static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; - struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - int ifehdrln = (int)ifehdr->metalen; - struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); + u8 *ifehdr_end; + u8 *tlv_data; + u16 metalen; spin_lock(&ife->tcf_lock); bstats_update(&ife->tcf_bstats, skb); tcf_lastuse_update(&ife->tcf_tm); spin_unlock(&ife->tcf_lock); - ifehdrln = ntohs(ifehdrln); - if (unlikely(!pskb_may_pull(skb, ifehdrln))) { + if (skb_at_tc_ingress(skb)) + skb_push(skb, skb->dev->hard_header_len); + + tlv_data = ife_decode(skb, &metalen); + if (unlikely(!tlv_data)) { spin_lock(&ife->tcf_lock); ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); return TC_ACT_SHOT; } - skb_set_mac_header(skb, ifehdrln); - __skb_pull(skb, ifehdrln); - skb->protocol = eth_type_trans(skb, skb->dev); - ifehdrln -= IFE_METAHDRLEN; - - while (ifehdrln > 0) { - u8 *tlvdata = (u8 *)tlv; - u16 mtype = tlv->type; - u16 mlen = tlv->len; - u16 alen; + ifehdr_end = tlv_data + metalen; + for (; tlv_data < ifehdr_end; tlv_data = ife_tlv_meta_next(tlv_data)) { + u8 *curr_data; + u16 mtype; + u16 dlen; - mtype = ntohs(mtype); - mlen = ntohs(mlen); - alen = NLA_ALIGN(mlen); + curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL); - if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN), - (void *)(tlvdata + NLA_HDRLEN))) { + if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ - pr_info_ratelimited("Unknown metaid %d alnlen %d\n", - mtype, mlen); + pr_info_ratelimited("Unknown metaid %d dlen %d\n", + mtype, dlen); ife->tcf_qstats.overlimits++; } + } - tlvdata += alen; - ifehdrln -= alen; - tlv = (struct meta_tlvhdr *)tlvdata; + if (WARN_ON(tlv_data != ifehdr_end)) { + spin_lock(&ife->tcf_lock); + ife->tcf_qstats.drops++; + spin_unlock(&ife->tcf_lock); + return TC_ACT_SHOT; } + skb->protocol = eth_type_trans(skb, skb->dev); skb_reset_network_header(skb); + return action; } @@ -727,7 +701,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ethhdr *oethh; /* outer ether header */ - struct ethhdr *iethh; /* inner eth header */ struct tcf_meta_info *e; /* OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA @@ -735,10 +708,11 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, */ u16 metalen = ife_get_sz(skb, ife); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; - unsigned int skboff = skb->dev->hard_header_len; + unsigned int skboff = 0; int new_len = skb->len + hdrm; bool exceed_mtu = false; - int err; + void *ife_meta; + int err = 0; if (!skb_at_tc_ingress(skb)) { if (new_len > skb->dev->mtu) @@ -765,27 +739,10 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - err = skb_cow_head(skb, hdrm); - if (unlikely(err)) { - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); - return TC_ACT_SHOT; - } - if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); - iethh = (struct ethhdr *)skb->data; - __skb_push(skb, hdrm); - memcpy(skb->data, iethh, skb->mac_len); - skb_reset_mac_header(skb); - oethh = eth_hdr(skb); - - /*total metadata length */ - metalen += IFE_METAHDRLEN; - metalen = htons(metalen); - memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN); - skboff += IFE_METAHDRLEN; + ife_meta = ife_encode(skb, metalen); /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by @@ -793,7 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, */ list_for_each_entry(e, &ife->metalist, metalist) { if (e->ops->encode) { - err = e->ops->encode(skb, (void *)(skb->data + skboff), + err = e->ops->encode(skb, (void *)(ife_meta + skboff), e); } if (err < 0) { @@ -804,15 +761,12 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } skboff += err; } + oethh = (struct ethhdr *)skb->data; if (!is_zero_ether_addr(ife->eth_src)) ether_addr_copy(oethh->h_source, ife->eth_src); - else - ether_addr_copy(oethh->h_source, iethh->h_source); if (!is_zero_ether_addr(ife->eth_dst)) ether_addr_copy(oethh->h_dest, ife->eth_dst); - else - ether_addr_copy(oethh->h_dest, iethh->h_dest); oethh->h_proto = htons(ife->eth_type); if (skb_at_tc_ingress(skb)) -- cgit v1.2.3 From f35581d64e55fc65753a62957b3b98127d560d07 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:51 -0800 Subject: ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info mode New ip_tunnel_info flag to represent bridged tunnel metadata. Used by bridge driver later in the series to pass per vlan dst metadata to bridge ports. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 3d4ca4df1209..95056796657c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -58,6 +58,7 @@ struct ip_tunnel_key { /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ +#define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ -- cgit v1.2.3 From 69ca05ce9dec2cc95070df7f1f10ea6c9c12d237 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Feb 2017 10:29:08 +0100 Subject: sched: cls_flower: expose priority to offloading netdevice The driver that offloads flower rules needs to know with which priority user inserted the rules. So add this information into offload struct. Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + net/sched/cls_flower.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b43077e47d35..dabb00af46a0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -481,6 +481,7 @@ enum tc_fl_command { struct tc_cls_flower_offload { enum tc_fl_command command; + u32 prio; unsigned long cookie; struct flow_dissector *dissector; struct fl_flow_key *mask; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 23c4d224dcb1..0826c8ec3a76 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -229,6 +229,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) return; offload.command = TC_CLSFLOWER_DESTROY; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; tc->type = TC_SETUP_CLSFLOWER; @@ -260,6 +261,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, } offload.command = TC_CLSFLOWER_REPLACE; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; offload.dissector = dissector; offload.mask = mask; @@ -287,6 +289,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) return; offload.command = TC_CLSFLOWER_STATS; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; offload.exts = &f->exts; -- cgit v1.2.3 From 0ae8133586ad1c9be894411aaf8b17bb58c8efe5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 2 Feb 2017 12:37:08 -0800 Subject: net: ipv6: Allow shorthand delete of all nexthops in multipath route IPv4 allows multipath routes to be deleted using just the prefix and length. For example: $ ip ro ls vrf red unreachable default metric 8192 1.1.1.0/24 nexthop via 10.100.1.254 dev eth1 weight 1 nexthop via 10.11.200.2 dev eth11.200 weight 1 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 $ ip ro del 1.1.1.0/24 vrf red $ ip ro ls vrf red unreachable default metric 8192 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 The same notation does not work with IPv6 because of how multipath routes are implemented for IPv6. For IPv6 only the first nexthop of a multipath route is deleted if the request contains only a prefix and length. This leads to unnecessary complexity in userspace dealing with IPv6 multipath routes. This patch allows all nexthops to be deleted without specifying each one in the delete request. Internally, this is done by walking the sibling list of the route matching the specifications given (prefix, length, metric, protocol, etc). $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium 2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024 pref medium 2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024 pref medium ... $ ip -6 ro del vrf red 2001:db8:200::/120 $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium ... Because IPv6 allows individual nexthops to be deleted without deleting the entire route, the ip6_route_multipath_del and non-multipath code path (ip6_route_del) have to be discriminated so that all nexthops are only deleted for the latter case. This is done by making the existing fc_type in fib6_config a u16 and then adding a new u16 field with fc_delete_all_nh as the first bit. Suggested-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 +++- net/ipv6/route.c | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a74e2aa40ef4..c979c878df1c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -37,7 +37,9 @@ struct fib6_config { int fc_ifindex; u32 fc_flags; u32 fc_protocol; - u32 fc_type; /* only 8 bits are used */ + u16 fc_type; /* only 8 bits are used */ + u16 fc_delete_all_nh : 1, + __unused : 15; struct in6_addr fc_dst; struct in6_addr fc_src; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 91eb3f7782dd..635b7fdef2eb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2143,6 +2143,34 @@ int ip6_del_rt(struct rt6_info *rt) return __ip6_del_rt(rt, &info); } +static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +{ + struct nl_info *info = &cfg->fc_nlinfo; + struct fib6_table *table; + int err; + + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + + if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { + struct rt6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, + rt6i_siblings) { + err = fib6_del(sibling, info); + if (err) + goto out; + } + } + + err = fib6_del(rt, info); +out: + write_unlock_bh(&table->tb6_lock); + ip6_rt_put(rt); + return err; +} + static int ip6_route_del(struct fib6_config *cfg) { struct fib6_table *table; @@ -2179,7 +2207,11 @@ static int ip6_route_del(struct fib6_config *cfg) dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - return __ip6_del_rt(rt, &cfg->fc_nlinfo); + /* if gateway was specified only delete the one hop */ + if (cfg->fc_flags & RTF_GATEWAY) + return __ip6_del_rt(rt, &cfg->fc_nlinfo); + + return __ip6_del_rt_siblings(rt, cfg); } } read_unlock_bh(&table->tb6_lock); @@ -3142,8 +3174,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (cfg.fc_mp) return ip6_route_multipath_del(&cfg); - else + else { + cfg.fc_delete_all_nh = 1; return ip6_route_del(&cfg); + } } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) -- cgit v1.2.3 From 3b1137fe74829e021f483756a648cbb87c8a1b4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 2 Feb 2017 12:37:10 -0800 Subject: net: ipv6: Change notifications for multipath add to RTA_MULTIPATH Change ip6_route_multipath_add to send one notifciation with the full route encoded with RTA_MULTIPATH instead of a series of individual routes. This is done by adding a skip_notify flag to the nl_info struct. The flag is used to skip sending of the notification in the fib code that actually inserts the route. Once the full route has been added, a notification is generated with all nexthops. ip6_route_multipath_add handles 3 use cases: new routes, route replace, and route append. The multipath notification generated needs to be consistent with the order of the nexthops and it should be consistent with the order in a FIB dump which means the route with the first nexthop needs to be used as the route reference. For the first 2 cases (new and replace), a reference to the route used to send the notification is obtained by saving the first route added. For the append case, the last route added is used to loop back to its first sibling route which is the first nexthop in the multipath route. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/netlink.h | 1 + net/ipv6/ip6_fib.c | 6 ++++-- net/ipv6/route.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index d3938f11ae52..b239fcd33d80 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -229,6 +229,7 @@ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; + bool skip_notify; }; int netlink_rcv_skb(struct sk_buff *skb, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bf5e22fb95d..99c68ce6ef78 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -881,7 +881,8 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { @@ -907,7 +908,8 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c740d9e249a6..cb3366d5e165 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3023,13 +3023,37 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } +static void ip6_route_mpath_notify(struct rt6_info *rt, + struct rt6_info *rt_last, + struct nl_info *info, + __u16 nlflags) +{ + /* if this is an APPEND route, then rt points to the first route + * inserted and rt_last points to last route inserted. Userspace + * wants a consistent dump of the route which starts at the first + * nexthop. Since sibling routes are always added at the end of + * the list, find the first sibling of the last route appended + */ + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { + rt = list_first_entry(&rt_last->rt6i_siblings, + struct rt6_info, + rt6i_siblings); + } + + if (rt) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); +} + static int ip6_route_multipath_add(struct fib6_config *cfg) { + struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct rt6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; + __u16 nlflags; int remaining; int attrlen; int err = 1; @@ -3038,6 +3062,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); LIST_HEAD(rt6_nh_list); + nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; + if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) + nlflags |= NLM_F_APPEND; + remaining = cfg->fc_mp_len; rtnh = (struct rtnexthop *)cfg->fc_mp; @@ -3080,9 +3108,20 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) rtnh = rtnh_next(rtnh, &remaining); } + /* for add and replace send one notification with all nexthops. + * Skip the notification in fib6_add_rt2node and send one with + * the full route when done + */ + info->skip_notify = 1; + err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); + rt_last = nh->rt6_info; + err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc); + /* save reference to first route for notification */ + if (!rt_notif && !err) + rt_notif = nh->rt6_info; + /* nh->rt6_info is used or freed at this point, reset to NULL*/ nh->rt6_info = NULL; if (err) { @@ -3104,9 +3143,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) nhn++; } + /* success ... tell user about new route */ + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); goto cleanup; add_errout: + /* send notification for routes that were added so that + * the delete notifications sent by ip6_route_del are + * coherent + */ + if (rt_notif) + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); + /* Delete routes that were already added */ list_for_each_entry(nh, &rt6_nh_list, next) { if (err_nh == nh) -- cgit v1.2.3 From f515f192ab4f45bb695146b82432d63d98775787 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Feb 2017 13:20:20 -0500 Subject: net: dsa: add switch notifier Add a notifier block per DSA switch, registered against a notifier head in the switch fabric they belong to. This infrastructure will allow to propagate fabric-wide events such as port bridging, VLAN configuration, etc. If a DSA switch driver cares about cross-chip configuration, such events can be caught. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ net/dsa/Makefile | 1 + net/dsa/dsa.c | 6 ++++++ net/dsa/dsa2.c | 6 ++++++ net/dsa/dsa_priv.h | 4 ++++ net/dsa/switch.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 77 insertions(+) create mode 100644 net/dsa/switch.c (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2cb77e64d648..ac4ea7c3a102 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -92,6 +93,9 @@ struct packet_type; struct dsa_switch_tree { struct list_head list; + /* Notifier chain for switch-wide events */ + struct raw_notifier_head nh; + /* Tree identifier */ u32 tree; @@ -182,6 +186,9 @@ struct dsa_switch { struct dsa_switch_tree *dst; int index; + /* Listener for switch fabric events */ + struct notifier_block nb; + /* * Give the switch driver somewhere to hang its private data * structure. diff --git a/net/dsa/Makefile b/net/dsa/Makefile index a3380ed0e0be..72912982de3d 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,7 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o slave.o dsa2.o +dsa_core-y += dsa.o slave.o dsa2.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index beb79ccf0f59..22e44f691ab9 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -275,6 +275,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) return ret; + ret = dsa_switch_register_notifier(ds); + if (ret) + return ret; + if (ops->set_addr) { ret = ops->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) @@ -400,6 +404,8 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); } #ifdef CONFIG_PM_SLEEP diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9f8cc26be9ea..1c546b6621ee 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -294,6 +294,10 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err < 0) return err; + err = dsa_switch_register_notifier(ds); + if (err) + return err; + if (ds->ops->set_addr) { err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) @@ -364,6 +368,8 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); } static int dsa_dst_apply(struct dsa_switch_tree *dst) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 591a40aea9ca..0706a511244e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -66,6 +66,10 @@ int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +/* switch.c */ +int dsa_switch_register_notifier(struct dsa_switch *ds); +void dsa_switch_unregister_notifier(struct dsa_switch *ds); + /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; diff --git a/net/dsa/switch.c b/net/dsa/switch.c new file mode 100644 index 000000000000..e22fa7633d03 --- /dev/null +++ b/net/dsa/switch.c @@ -0,0 +1,53 @@ +/* + * Handling of a single switch chip, part of a switch fabric + * + * Copyright (c) 2017 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include + +static int dsa_switch_event(struct notifier_block *nb, + unsigned long event, void *info) +{ + struct dsa_switch *ds = container_of(nb, struct dsa_switch, nb); + int err; + + switch (event) { + default: + err = -EOPNOTSUPP; + break; + } + + /* Non-switchdev operations cannot be rolled back. If a DSA driver + * returns an error during the chained call, switch chips may be in an + * inconsistent state. + */ + if (err) + dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n", + event, err); + + return notifier_from_errno(err); +} + +int dsa_switch_register_notifier(struct dsa_switch *ds) +{ + ds->nb.notifier_call = dsa_switch_event; + + return raw_notifier_chain_register(&ds->dst->nh, &ds->nb); +} + +void dsa_switch_unregister_notifier(struct dsa_switch *ds) +{ + int err; + + err = raw_notifier_chain_unregister(&ds->dst->nh, &ds->nb); + if (err) + dev_err(ds->dev, "failed to unregister notifier (%d)\n", err); +} -- cgit v1.2.3 From 04d3a4c6af52a58370795bc9f70dc15f51f8bb84 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 3 Feb 2017 13:20:21 -0500 Subject: net: dsa: introduce bridge notifier A slave device will now notify the switch fabric once its port is bridged or unbridged, instead of calling directly its switch operations. This code allows propagating cross-chip bridging events in the fabric. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 10 ++++++++++ net/dsa/slave.c | 40 +++++++++++++++++++++++++++++----------- net/dsa/switch.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index ac4ea7c3a102..e9c940c8936f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -268,6 +268,16 @@ struct switchdev_obj_port_fdb; struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; +#define DSA_NOTIFIER_BRIDGE_JOIN 1 +#define DSA_NOTIFIER_BRIDGE_LEAVE 2 + +/* DSA_NOTIFIER_BRIDGE_* */ +struct dsa_notifier_bridge_info { + struct net_device *br; + int sw_index; + int port; +}; + struct dsa_switch_ops { /* * Probing and setup. diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d8c3c0f00cf3..061a49c29cef 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -27,6 +27,17 @@ static bool dsa_slave_dev_check(struct net_device *dev); +static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct raw_notifier_head *nh = &p->dp->ds->dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -562,39 +573,46 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int ret = -EOPNOTSUPP; + struct dsa_notifier_bridge_info info = { + .sw_index = p->dp->ds->index, + .port = p->dp->index, + .br = br, + }; + int err; /* Here the port is already bridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ p->dp->bridge_dev = br; - if (ds->ops->port_bridge_join) - ret = ds->ops->port_bridge_join(ds, p->dp->index, br); + err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info); /* The bridging is rolled back on error */ - if (ret && ret != -EOPNOTSUPP) { + if (err) p->dp->bridge_dev = NULL; - return ret; - } - return 0; + return err; } static void dsa_slave_bridge_port_leave(struct net_device *dev, struct net_device *br) { struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_notifier_bridge_info info = { + .sw_index = p->dp->ds->index, + .port = p->dp->index, + .br = br, + }; + int err; /* Here the port is already unbridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ p->dp->bridge_dev = NULL; - if (ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, p->dp->index, br); + err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + if (err) + netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e22fa7633d03..6456dacf9ae9 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -13,6 +13,32 @@ #include #include +static int dsa_switch_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_bridge_join) + return ds->ops->port_bridge_join(ds, info->port, info->br); + + if (ds->index != info->sw_index) + dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n", + info->sw_index, info->port, netdev_name(info->br)); + + return 0; +} + +static int dsa_switch_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->port, info->br); + + if (ds->index != info->sw_index) + dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n", + info->sw_index, info->port, netdev_name(info->br)); + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -20,6 +46,12 @@ static int dsa_switch_event(struct notifier_block *nb, int err; switch (event) { + case DSA_NOTIFIER_BRIDGE_JOIN: + err = dsa_switch_bridge_join(ds, info); + break; + case DSA_NOTIFIER_BRIDGE_LEAVE: + err = dsa_switch_bridge_leave(ds, info); + break; default: err = -EOPNOTSUPP; break; -- cgit v1.2.3 From 14b89f36eed2993670906a3991bca496a5ebf1a6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 4 Feb 2017 13:02:42 -0800 Subject: net: dsa: Rename and export dev_to_net_device() In preparation for using this function in net/dsa/dsa2.c, rename the function to make its scope DSA specific, and export it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/dsa.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index e9c940c8936f..2a21fa80f898 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -445,6 +445,7 @@ struct dsa_switch_driver { void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); +struct net_device *dsa_dev_to_net_device(struct device *dev); static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22e44f691ab9..b6d4f6a23f06 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -492,7 +492,7 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); -static struct net_device *dev_to_net_device(struct device *dev) +struct net_device *dsa_dev_to_net_device(struct device *dev) { struct device *d; @@ -509,6 +509,7 @@ static struct net_device *dev_to_net_device(struct device *dev) return NULL; } +EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); #ifdef CONFIG_OF static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, @@ -817,7 +818,7 @@ static int dsa_probe(struct platform_device *pdev) dev = pd->of_netdev; dev_hold(dev); } else { - dev = dev_to_net_device(pd->netdev); + dev = dsa_dev_to_net_device(pd->netdev); } if (dev == NULL) { ret = -EPROBE_DEFER; -- cgit v1.2.3 From 71e0bbde0d88047f66b25721f69a441d46083748 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 4 Feb 2017 13:02:43 -0800 Subject: net: dsa: Add support for platform data Allow drivers to use the new DSA API with platform data. Most of the code in net/dsa/dsa2.c does not rely so much on device_nodes and can get the same information from platform_data instead. We purposely do not support distributed configurations with platform data, so drivers should be providing a pointer to a 'struct dsa_chip_data' structure if they wish to communicate per-port layout. Multiple CPUs port could potentially be supported and dsa_chip_data is extended to receive up to one reference to an upstream network device per port described by a dsa_chip_data structure. dsa_dev_to_net_device() increments the network device's reference count, so we intentionally call dev_put() to be consistent with the DT-enabled path, until we have a generic notifier based solution. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++ net/dsa/dsa2.c | 102 ++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 90 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2a21fa80f898..b49b2004891e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -45,6 +45,11 @@ struct dsa_chip_data { struct device *host_dev; int sw_addr; + /* + * Reference to network devices + */ + struct device *netdev[DSA_MAX_PORTS]; + /* set to size of eeprom if supported by the switch */ int eeprom_len; @@ -170,6 +175,7 @@ struct dsa_mall_tc_entry { struct dsa_port { struct dsa_switch *ds; unsigned int index; + const char *name; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1c546b6621ee..6f5f0a2ad256 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -78,19 +78,28 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst, kref_put(&dst->refcount, dsa_free_dst); } +/* For platform data configurations, we need to have a valid name argument to + * differentiate a disabled port from an enabled one + */ static bool dsa_port_is_valid(struct dsa_port *port) { - return !!port->dn; + return !!(port->dn || port->name); } static bool dsa_port_is_dsa(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "link", 0); + if (port->name && !strcmp(port->name, "dsa")) + return true; + else + return !!of_parse_phandle(port->dn, "link", 0); } static bool dsa_port_is_cpu(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "ethernet", 0); + if (port->name && !strcmp(port->name, "cpu")) + return true; + else + return !!of_parse_phandle(port->dn, "ethernet", 0); } static bool dsa_ds_find_port_dn(struct dsa_switch *ds, @@ -250,10 +259,11 @@ static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, static int dsa_user_port_apply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { - const char *name; + const char *name = port->name; int err; - name = of_get_property(port->dn, "label", NULL); + if (port->dn) + name = of_get_property(port->dn, "label", NULL); if (!name) name = "eth%d"; @@ -444,11 +454,16 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct net_device *ethernet_dev; struct device_node *ethernet; - ethernet = of_parse_phandle(port->dn, "ethernet", 0); - if (!ethernet) - return -EINVAL; + if (port->dn) { + ethernet = of_parse_phandle(port->dn, "ethernet", 0); + if (!ethernet) + return -EINVAL; + ethernet_dev = of_find_net_device_by_node(ethernet); + } else { + ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); + dev_put(ethernet_dev); + } - ethernet_dev = of_find_net_device_by_node(ethernet); if (!ethernet_dev) return -EPROBE_DEFER; @@ -551,6 +566,33 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) return 0; } +static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +{ + bool valid_name_found = false; + unsigned int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + if (!cd->port_names[i]) + continue; + + ds->ports[i].name = cd->port_names[i]; + + /* Initialize enabled_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + if (!dsa_port_is_cpu(&ds->ports[i])) + ds->enabled_port_mask |= 1 << i; + + valid_name_found = true; + } + + if (!valid_name_found && i == DSA_MAX_PORTS) + return -EINVAL; + + return 0; +} + static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) { int err; @@ -575,6 +617,18 @@ static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) return 0; } +static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) +{ + if (!pd) + return -ENODEV; + + /* We do not support complex trees with dsa_chip_data */ + *tree = 0; + *index = 0; + + return 0; +} + static struct device_node *dsa_get_ports(struct dsa_switch *ds, struct device_node *np) { @@ -591,23 +645,34 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) { + struct dsa_chip_data *pdata = dev->platform_data; struct device_node *np = dev->of_node; struct dsa_switch_tree *dst; struct device_node *ports; u32 tree, index; int i, err; - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; + if (np) { + err = dsa_parse_member_dn(np, &tree, &index); + if (err) + return err; - ports = dsa_get_ports(ds, np); - if (IS_ERR(ports)) - return PTR_ERR(ports); + ports = dsa_get_ports(ds, np); + if (IS_ERR(ports)) + return PTR_ERR(ports); - err = dsa_parse_ports_dn(ports, ds); - if (err) - return err; + err = dsa_parse_ports_dn(ports, ds); + if (err) + return err; + } else { + err = dsa_parse_member(pdata, &tree, &index); + if (err) + return err; + + err = dsa_parse_ports(pdata, ds); + if (err) + return err; + } dst = dsa_get_dst(tree); if (!dst) { @@ -623,6 +688,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) ds->dst = dst; ds->index = index; + ds->cd = pdata; /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) -- cgit v1.2.3 From 9b8805a325591cf5b6b9df71200de25a2bd721fd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:11 +0200 Subject: sock: add sk_dst_pending_confirm flag Add new sock flag to allow sockets to confirm neighbour. When same struct dst_entry can be used for many different neighbours we can not use it for pending confirmations. As not all call paths lock the socket use full word for the flag. Add sk_dst_confirm as replacement for dst_confirm when called for received packets. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++++++ net/core/sock.c | 2 ++ 2 files changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 94e65fd70354..85d856b94b4b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -240,6 +240,7 @@ struct sock_common { * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache + * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -393,6 +394,8 @@ struct sock { struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; + __u32 sk_dst_pending_confirm; + /* Note: 32bit hole on 64bit arches */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -1764,6 +1767,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; } } } @@ -1774,6 +1778,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; /* * This can be called while sk is owned by the caller only, * with no state that can be checked in a rcu_dereference_check() cond @@ -1789,6 +1794,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -1809,6 +1815,12 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline void sk_dst_confirm(struct sock *sk) +{ + if (!sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 1; +} + bool sk_mc_loop(struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index 8b35debfe454..b74356535559 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; @@ -1519,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; + newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; atomic_set(&newsk->sk_drops, 0); -- cgit v1.2.3 From 4ff0620354f2b39b9fe2a91c22c4de9d1fba0c8e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:12 +0200 Subject: net: add dst_pending_confirm flag to skbuff Add new skbuff flag to allow protocols to confirm neighbour. When same struct dst_entry can be used for many different neighbours we can not use it for pending confirmations. Add sock_confirm_neigh() helper to confirm the neighbour and use it for IPv4, IPv6 and VRF before dst_neigh_output. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 ++++- include/linux/skbuff.h | 12 ++++++++++++ include/net/sock.h | 14 ++++++++++++++ net/ipv4/ip_output.c | 5 ++++- net/ipv6/ip6_output.c | 1 + 5 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 264fc1585b3c..630eafdb79e8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -378,6 +378,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return ret; @@ -574,8 +575,10 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (!IS_ERR(neigh)) + if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); + } rcu_read_unlock_bh(); err: diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6a78e1892b6..f1adddc1c5ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @dst_pending_confirm: need to confirm neighbour * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -741,6 +742,7 @@ struct sk_buff { __u8 csum_level:2; __u8 csum_bad:1; + __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } +static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) +{ + skb->dst_pending_confirm = val; +} + +static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) +{ + return skb->dst_pending_confirm != 0; +} + static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM diff --git a/include/net/sock.h b/include/net/sock.h index 85d856b94b4b..6f83e78eaa5a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1821,6 +1821,20 @@ static inline void sk_dst_confirm(struct sock *sk) sk->sk_dst_pending_confirm = 1; } +static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) +{ + if (skb_get_dst_pending_confirm(skb)) { + struct sock *sk = skb->sk; + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + if (sk && sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 0; + } +} + bool sk_mc_loop(struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b67719f45953..c9fc32fa3272 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -222,7 +222,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { - int res = dst_neigh_output(dst, neigh, skb); + int res; + + sock_confirm_neigh(skb, neigh); + res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b6a94ff0bbd0..14d99fbf102e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,6 +119,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { + sock_confirm_neigh(skb, neigh); ret = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return ret; -- cgit v1.2.3 From c86a773c78025f5b825bacd7b846f4fa60dc0317 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:13 +0200 Subject: sctp: add dst_pending_confirm flag Add new transport flag to allow sockets to confirm neighbour. When same struct dst_entry can be used for many different neighbours we can not use it for pending confirmations. The flag is propagated from transport to every packet. It is reset when cached dst is reset. Reported-by: YueHaibing Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.") Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.") Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 ++---- include/net/sctp/structs.h | 4 ++++ net/sctp/associola.c | 3 +-- net/sctp/output.c | 10 +++++++++- net/sctp/outqueue.c | 2 +- net/sctp/sm_make_chunk.c | 6 ++---- net/sctp/sm_sideeffect.c | 2 +- net/sctp/socket.c | 4 ++-- net/sctp/transport.c | 16 +++++++++++++++- 9 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3cfd365bcfbc..480b65a24aff 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -593,10 +593,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && !dst_check(t->dst, t->dst_cookie)) { - dst_release(t->dst); - t->dst = NULL; - } + if (t->dst && !dst_check(t->dst, t->dst_cookie)) + sctp_transport_dst_release(t); return t->dst; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 231fa9ac50bd..6a685049f67f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -804,6 +804,8 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + __u32 dst_pending_confirm; /* need to confirm neighbour */ + /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -950,6 +952,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); void sctp_transport_immediate_rtx(struct sctp_transport *); +void sctp_transport_dst_release(struct sctp_transport *t); +void sctp_transport_dst_confirm(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e50dc6d7543f..2a6835b4562b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -832,8 +832,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, if (transport->state != SCTP_UNCONFIRMED) transport->state = SCTP_INACTIVE; else { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); ulp_notify = false; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 07ab5062e541..814eac047467 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -546,6 +546,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; + int confirm; struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; @@ -624,7 +625,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; - tp->af_specific->sctp_xmit(head, tp); + confirm = tp->dst_pending_confirm; + if (confirm) + skb_set_dst_pending_confirm(head, 1); + /* neighbour should be confirmed on successful transmission or + * positive error + */ + if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm) + tp->dst_pending_confirm = 0; out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 65abe22d8691..db352e5d61f8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1654,7 +1654,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, if (forward_progress) { if (transport->dst) - dst_confirm(transport->dst); + sctp_transport_dst_confirm(transport); } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ad3445b3408e..c7d3249f88ec 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3333,8 +3333,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; case SCTP_PARAM_DEL_IP: @@ -3348,8 +3347,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; default: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a4552712b882..51abcc90fe75 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -755,7 +755,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * forward progress. */ if (t->dst) - dst_confirm(t->dst); + sctp_transport_dst_confirm(t); /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fc7122c76de..a4609a0be76d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -592,7 +592,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { /* Clear the source and route cache */ - dst_release(trans->dst); + sctp_transport_dst_release(trans); trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); trans->ssthresh = asoc->peer.i.a_rwnd; @@ -843,7 +843,7 @@ skip_mkasconf: */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index baa1ac00d7b5..5b63ceb3bf37 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -240,7 +240,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ if (!transport->dst || transport->dst->obsolete) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); } @@ -672,3 +672,17 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t) sctp_transport_hold(t); } } + +/* Drop dst */ +void sctp_transport_dst_release(struct sctp_transport *t) +{ + dst_release(t->dst); + t->dst = NULL; + t->dst_pending_confirm = 0; +} + +/* Schedule neighbour confirm */ +void sctp_transport_dst_confirm(struct sctp_transport *t) +{ + t->dst_pending_confirm = 1; +} -- cgit v1.2.3 From 63fca65d08632fbec9d9b655f671cf08aa1aeeb8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:15 +0200 Subject: net: add confirm_neigh method to dst_ops Add confirm_neigh method to dst_ops and use it from IPv4 and IPv6 to lookup and confirm the neighbour. Its usage via the new helper dst_confirm_neigh() should be restricted to MSG_PROBE users for performance reasons. For XFRM prefer the last tunnel address, if present. With help from Steffen Klassert. Signed-off-by: Julian Anastasov Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/arp.h | 16 ++++++++++++++++ include/net/dst.h | 7 +++++++ include/net/dst_ops.h | 2 ++ include/net/ndisc.h | 17 +++++++++++++++++ net/ipv4/route.c | 19 +++++++++++++++++++ net/ipv6/route.c | 16 ++++++++++++++++ net/xfrm/xfrm_policy.c | 19 +++++++++++++++++++ 7 files changed, 96 insertions(+) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..65619a2de6f4 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 return n; } +static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, diff --git a/include/net/dst.h b/include/net/dst.h index 6835d224d47b..3a3b34b83b00 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -477,6 +477,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst return IS_ERR(n) ? NULL : n; } +static inline void dst_confirm_neigh(const struct dst_entry *dst, + const void *daddr) +{ + if (dst->ops->confirm_neigh) + dst->ops->confirm_neigh(dst, daddr); +} + static inline void dst_link_failure(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 8a2b66d8d78d..c84b3287e38b 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -33,6 +33,8 @@ struct dst_ops { struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); + void (*confirm_neigh)(const struct dst_entry *dst, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d562a2fe4860..8a0214654b6b 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -391,6 +391,23 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons return n; } +static inline void __ipv6_confirm_neigh(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b7c231c1aef..cb494a5050f7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -168,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = { .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, + .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + const struct rtable *rt; + + rt = (const struct rtable *)dst; + if (rt->rt_gateway) + pkey = (const __be32 *)&rt->rt_gateway; + else if (!daddr || + (rt->rt_flags & + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + return; + + __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); +} + #define IP_IDENTS_SZ 2048u static atomic_t *ip_idents __read_mostly; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8ffa24cc8899..98b183f1bc8b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -223,6 +223,21 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, return neigh_create(&nd_tbl, daddr, dst->dev); } +static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + struct rt6_info *rt = (struct rt6_info *)dst; + + daddr = choose_neigh_daddr(rt, NULL, daddr); + if (!daddr) + return; + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + return; + if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) + return; + __ipv6_confirm_neigh(dev, daddr); +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .gc = ip6_dst_gc, @@ -239,6 +254,7 @@ static struct dst_ops ip6_dst_ops_template = { .redirect = rt6_do_redirect, .local_out = __ip6_local_out, .neigh_lookup = ip6_neigh_lookup, + .confirm_neigh = ip6_confirm_neigh, }; static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 99ad1af2927f..0a0f63d3cc96 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2856,6 +2856,23 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, return dst->path->ops->neigh_lookup(dst, skb, daddr); } +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + const struct dst_entry *path = dst->path; + + for (; dst != path; dst = dst->child) { + const struct xfrm_state *xfrm = dst->xfrm; + + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) + continue; + if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = xfrm->coaddr; + else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) + daddr = &xfrm->id.daddr; + } + path->ops->confirm_neigh(path, daddr); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -2882,6 +2899,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; + if (likely(!dst_ops->confirm_neigh)) + dst_ops->confirm_neigh = xfrm_confirm_neigh; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = xfrm_garbage_collect_deferred; rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); -- cgit v1.2.3 From 51ce8bd4d17a761e1a90a34a1b5c9b762cce7553 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:17 +0200 Subject: net: pending_confirm is not used anymore When same struct dst_entry can be used for many different neighbours we can not use it for pending confirmations. As last step, we can remove the pending_confirm flag. Reported-by: YueHaibing Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.") Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.") Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 14 ++------------ net/core/dst.c | 1 - 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 3a3b34b83b00..84a1043dd6a1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0100 #define DST_METADATA 0x0200 - unsigned short pending_confirm; - short error; /* A non-zero value of dst->obsolete forces by-hand validation @@ -78,6 +76,8 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ + unsigned short __pad3; + #ifdef CONFIG_IP_ROUTE_CLASSID __u32 tclassid; #else @@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - dst->pending_confirm = 1; } static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, @@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, { const struct hh_cache *hh; - if (dst->pending_confirm) { - unsigned long now = jiffies; - - dst->pending_confirm = 0; - /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - } - hh = &n->hh; if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) return neigh_hh_output(hh, skb); diff --git a/net/core/dst.c b/net/core/dst.c index b5cbbe07f786..960e503b5a52 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; - dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); -- cgit v1.2.3 From 85c727b5948344c8d559e2fda8925e9ddd41c29a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 7 Feb 2017 11:37:56 -0200 Subject: sctp: drop __packed from almost all SCTP structures __packed is considered harmful as it potentially generates code that doesn't perform well and its usage should be avoided as much as possible. This patch drops __packed from all SCTP structures except one, which is sctp_signed_cookie. In there it's required, as per changelog on commit 9834a2bb4970 ("[SCTP]: Fix sctp_cookie alignment in the packet."). After this patch, no alignment changes neither in x86 or x86_64 and no exceptions were noticed during testing on both archs. Code size for SCTP module also didn't change with this patch. Cc: David Miller Cc: David Laight Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/sctp.h | 80 +++++++++++++++++++++++----------------------- include/net/sctp/structs.h | 2 +- 2 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include/net') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a9e790685af3..2408c6877ca0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -62,7 +62,7 @@ typedef struct sctphdr { __be16 dest; __be32 vtag; __le32 checksum; -} __packed sctp_sctphdr_t; +} sctp_sctphdr_t; static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { @@ -74,7 +74,7 @@ typedef struct sctp_chunkhdr { __u8 type; __u8 flags; __be16 length; -} __packed sctp_chunkhdr_t; +} sctp_chunkhdr_t; /* Section 3.2. Chunk Type Values. @@ -165,7 +165,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 }; typedef struct sctp_paramhdr { __be16 type; __be16 length; -} __packed sctp_paramhdr_t; +} sctp_paramhdr_t; typedef enum { @@ -233,12 +233,12 @@ typedef struct sctp_datahdr { __be16 ssn; __be32 ppid; __u8 payload[0]; -} __packed sctp_datahdr_t; +} sctp_datahdr_t; typedef struct sctp_data_chunk { sctp_chunkhdr_t chunk_hdr; sctp_datahdr_t data_hdr; -} __packed sctp_data_chunk_t; +} sctp_data_chunk_t; /* DATA Chuck Specific Flags */ enum { @@ -264,78 +264,78 @@ typedef struct sctp_inithdr { __be16 num_inbound_streams; __be32 initial_tsn; __u8 params[0]; -} __packed sctp_inithdr_t; +} sctp_inithdr_t; typedef struct sctp_init_chunk { sctp_chunkhdr_t chunk_hdr; sctp_inithdr_t init_hdr; -} __packed sctp_init_chunk_t; +} sctp_init_chunk_t; /* Section 3.3.2.1. IPv4 Address Parameter (5) */ typedef struct sctp_ipv4addr_param { sctp_paramhdr_t param_hdr; struct in_addr addr; -} __packed sctp_ipv4addr_param_t; +} sctp_ipv4addr_param_t; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { sctp_paramhdr_t param_hdr; struct in6_addr addr; -} __packed sctp_ipv6addr_param_t; +} sctp_ipv6addr_param_t; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { sctp_paramhdr_t param_hdr; __be32 lifespan_increment; -} __packed sctp_cookie_preserve_param_t; +} sctp_cookie_preserve_param_t; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { sctp_paramhdr_t param_hdr; uint8_t hostname[0]; -} __packed sctp_hostname_param_t; +} sctp_hostname_param_t; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { sctp_paramhdr_t param_hdr; __be16 types[0]; -} __packed sctp_supported_addrs_param_t; +} sctp_supported_addrs_param_t; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { sctp_paramhdr_t param_hdr; -} __packed sctp_ecn_capable_param_t; +} sctp_ecn_capable_param_t; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ typedef struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} __packed sctp_adaptation_ind_param_t; +} sctp_adaptation_ind_param_t; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ typedef struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} __packed sctp_supported_ext_param_t; +} sctp_supported_ext_param_t; /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { sctp_paramhdr_t param_hdr; __u8 random_val[0]; -} __packed sctp_random_param_t; +} sctp_random_param_t; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { sctp_paramhdr_t param_hdr; __u8 chunks[0]; -} __packed sctp_chunks_param_t; +} sctp_chunks_param_t; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { sctp_paramhdr_t param_hdr; __be16 hmac_ids[0]; -} __packed sctp_hmac_algo_param_t; +} sctp_hmac_algo_param_t; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP @@ -347,13 +347,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t; typedef struct sctp_cookie_param { sctp_paramhdr_t p; __u8 body[0]; -} __packed sctp_cookie_param_t; +} sctp_cookie_param_t; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { sctp_paramhdr_t param_hdr; sctp_paramhdr_t unrecognized; -} __packed sctp_unrecognized_param_t; +} sctp_unrecognized_param_t; @@ -368,7 +368,7 @@ typedef struct sctp_unrecognized_param { typedef struct sctp_gap_ack_block { __be16 start; __be16 end; -} __packed sctp_gap_ack_block_t; +} sctp_gap_ack_block_t; typedef __be32 sctp_dup_tsn_t; @@ -383,12 +383,12 @@ typedef struct sctp_sackhdr { __be16 num_gap_ack_blocks; __be16 num_dup_tsns; sctp_sack_variable_t variable[0]; -} __packed sctp_sackhdr_t; +} sctp_sackhdr_t; typedef struct sctp_sack_chunk { sctp_chunkhdr_t chunk_hdr; sctp_sackhdr_t sack_hdr; -} __packed sctp_sack_chunk_t; +} sctp_sack_chunk_t; /* RFC 2960. Section 3.3.5 Heartbeat Request (HEARTBEAT) (4): @@ -400,12 +400,12 @@ typedef struct sctp_sack_chunk { typedef struct sctp_heartbeathdr { sctp_paramhdr_t info; -} __packed sctp_heartbeathdr_t; +} sctp_heartbeathdr_t; typedef struct sctp_heartbeat_chunk { sctp_chunkhdr_t chunk_hdr; sctp_heartbeathdr_t hb_hdr; -} __packed sctp_heartbeat_chunk_t; +} sctp_heartbeat_chunk_t; /* For the abort and shutdown ACK we must carry the init tag in the @@ -414,7 +414,7 @@ typedef struct sctp_heartbeat_chunk { */ typedef struct sctp_abort_chunk { sctp_chunkhdr_t uh; -} __packed sctp_abort_chunk_t; +} sctp_abort_chunk_t; /* For the graceful shutdown we must carry the tag (in common header) @@ -422,12 +422,12 @@ typedef struct sctp_abort_chunk { */ typedef struct sctp_shutdownhdr { __be32 cum_tsn_ack; -} __packed sctp_shutdownhdr_t; +} sctp_shutdownhdr_t; struct sctp_shutdown_chunk_t { sctp_chunkhdr_t chunk_hdr; sctp_shutdownhdr_t shutdown_hdr; -} __packed; +}; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ @@ -435,12 +435,12 @@ typedef struct sctp_errhdr { __be16 cause; __be16 length; __u8 variable[0]; -} __packed sctp_errhdr_t; +} sctp_errhdr_t; typedef struct sctp_operr_chunk { sctp_chunkhdr_t chunk_hdr; sctp_errhdr_t err_hdr; -} __packed sctp_operr_chunk_t; +} sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error * @@ -530,7 +530,7 @@ typedef struct sctp_ecnehdr { typedef struct sctp_ecne_chunk { sctp_chunkhdr_t chunk_hdr; sctp_ecnehdr_t ence_hdr; -} __packed sctp_ecne_chunk_t; +} sctp_ecne_chunk_t; /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) @@ -542,7 +542,7 @@ typedef struct sctp_cwrhdr { typedef struct sctp_cwr_chunk { sctp_chunkhdr_t chunk_hdr; sctp_cwrhdr_t cwr_hdr; -} __packed sctp_cwr_chunk_t; +} sctp_cwr_chunk_t; /* PR-SCTP * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN) @@ -593,17 +593,17 @@ typedef struct sctp_cwr_chunk { struct sctp_fwdtsn_skip { __be16 stream; __be16 ssn; -} __packed; +}; struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; struct sctp_fwdtsn_skip skip[0]; -} __packed; +}; struct sctp_fwdtsn_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_fwdtsn_hdr fwdtsn_hdr; -} __packed; +}; /* ADDIP @@ -641,17 +641,17 @@ struct sctp_fwdtsn_chunk { typedef struct sctp_addip_param { sctp_paramhdr_t param_hdr; __be32 crr_id; -} __packed sctp_addip_param_t; +} sctp_addip_param_t; typedef struct sctp_addiphdr { __be32 serial; __u8 params[0]; -} __packed sctp_addiphdr_t; +} sctp_addiphdr_t; typedef struct sctp_addip_chunk { sctp_chunkhdr_t chunk_hdr; sctp_addiphdr_t addip_hdr; -} __packed sctp_addip_chunk_t; +} sctp_addip_chunk_t; /* AUTH * Section 4.1 Authentication Chunk (AUTH) @@ -706,12 +706,12 @@ typedef struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; __u8 hmac[0]; -} __packed sctp_authhdr_t; +} sctp_authhdr_t; typedef struct sctp_auth_chunk { sctp_chunkhdr_t chunk_hdr; sctp_authhdr_t auth_hdr; -} __packed sctp_auth_chunk_t; +} sctp_auth_chunk_t; struct sctp_infox { struct sctp_info *sctpinfo; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6a685049f67f..387c802bf248 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -374,7 +374,7 @@ typedef struct sctp_sender_hb_info { union sctp_addr daddr; unsigned long sent_at; __u64 hb_nonce; -} __packed sctp_sender_hb_info_t; +} sctp_sender_hb_info_t; struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp); void sctp_stream_free(struct sctp_stream *stream); -- cgit v1.2.3 From 66cd794e3c30b8af3b6befe42a378557efb3114a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Feb 2017 22:40:44 +0200 Subject: nl80211: add HT/VHT capabilities to AP parameters For the benefit of drivers that rebuild IEs in firmware, parse the IEs for HT/VHT capabilities and the respective membership selector in the (extended) supported rates. This avoids duplicating the same code into all drivers that need this information. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++- include/net/cfg80211.h | 8 ++++++++ net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 02768de209d6..0dd9498c694f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1043,8 +1043,9 @@ struct ieee80211_mgmt { } u; } __packed __aligned(2); -/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ +/* Supported rates membership selectors */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 +#define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 870549480e9b..5cfd2806a078 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -748,6 +748,10 @@ struct cfg80211_bitrate_mask { * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. * @beacon_rate: bitrate to be used for beacons + * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) + * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) + * @ht_required: stations must support HT + * @vht_required: stations must support VHT */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -768,6 +772,10 @@ struct cfg80211_ap_settings { const struct cfg80211_acl_data *acl; bool pbss; struct cfg80211_bitrate_mask beacon_rate; + + const struct ieee80211_ht_cap *ht_cap; + const struct ieee80211_vht_cap *vht_cap; + bool ht_required, vht_required; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a7b4318f735d..c853746f47bc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3,7 +3,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015-2016 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH */ #include @@ -3743,6 +3743,49 @@ static int nl80211_parse_beacon(struct nlattr *attrs[], return 0; } +static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, + const u8 *rates) +{ + int i; + + if (!rates) + return; + + for (i = 0; i < rates[1]; i++) { + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + params->ht_required = true; + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) + params->vht_required = true; + } +} + +/* + * Since the nl80211 API didn't include, from the beginning, attributes about + * HT/VHT requirements/capabilities, we parse them out of the IEs for the + * benefit of drivers that rebuild IEs in the firmware. + */ +static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) +{ + const struct cfg80211_beacon_data *bcn = ¶ms->beacon; + size_t ies_len = bcn->beacon_ies_len; + const u8 *ies = bcn->beacon_ies; + const u8 *rates; + const u8 *cap; + + rates = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies, ies_len); + nl80211_check_ap_rate_selectors(params, rates); + + rates = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); + nl80211_check_ap_rate_selectors(params, rates); + + cap = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->ht_cap)) + params->ht_cap = (void *)(cap + 2); + cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->vht_cap)) + params->vht_cap = (void *)(cap + 2); +} + static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { @@ -3971,6 +4014,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(params.acl); } + nl80211_calculate_ap_params(¶ms); + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { -- cgit v1.2.3 From 769f07d8f0fb6a68a0eda6308bbe890bff894fd7 Mon Sep 17 00:00:00 2001 From: Andrzej Zaborowski Date: Wed, 25 Jan 2017 12:43:40 +0100 Subject: mac80211: Pass new RSSI level in CQM RSSI notification Extend ieee80211_cqm_rssi_notify with a rssi_level parameter so that this information can be passed to netlink clients in the next patch, if available. Most drivers will have this value at hand. wl1251 receives events from the firmware that only tell it whether latest measurement is above or below threshold so we don't pass any value at this time (parameter is 0). Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 2 ++ drivers/net/wireless/rsi/rsi_91x_mac80211.c | 2 +- drivers/net/wireless/st/cw1200/sta.c | 2 +- drivers/net/wireless/ti/wl1251/event.c | 4 ++-- drivers/net/wireless/ti/wlcore/event.c | 3 ++- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 7 ++++--- net/mac80211/trace.h | 11 +++++++---- 8 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 0e60e38b2acf..e06a2e323cc8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -571,6 +571,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, ieee80211_cqm_rssi_notify( vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, + sig, GFP_KERNEL); } else if (sig > thold && (last_event == 0 || sig > last_event + hyst)) { @@ -580,6 +581,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, ieee80211_cqm_rssi_notify( vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, + sig, GFP_KERNEL); } } diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index dadaa73ab49d..e3216473aecb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -877,7 +877,7 @@ static void rsi_perform_cqm(struct rsi_common *common, common->cqm_info.last_cqm_event_rssi = rssi; rsi_dbg(INFO_ZONE, "CQM: Notifying event: %d\n", event); - ieee80211_cqm_rssi_notify(adapter->vifs[0], event, GFP_KERNEL); + ieee80211_cqm_rssi_notify(adapter->vifs[0], event, rssi, GFP_KERNEL); return; } diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index daf06a4f842e..a52224836a2b 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -1019,7 +1019,7 @@ void cw1200_event_handler(struct work_struct *work) NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW : NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH; pr_debug("[CQM] RSSI event: %d.\n", rcpi_rssi); - ieee80211_cqm_rssi_notify(priv->vif, cqm_evt, + ieee80211_cqm_rssi_notify(priv->vif, cqm_evt, rcpi_rssi, GFP_KERNEL); break; } diff --git a/drivers/net/wireless/ti/wl1251/event.c b/drivers/net/wireless/ti/wl1251/event.c index d0593bc1f1a9..f5acd24d0e2b 100644 --- a/drivers/net/wireless/ti/wl1251/event.c +++ b/drivers/net/wireless/ti/wl1251/event.c @@ -150,7 +150,7 @@ static int wl1251_event_process(struct wl1251 *wl, struct event_mailbox *mbox) "ROAMING_TRIGGER_LOW_RSSI_EVENT"); ieee80211_cqm_rssi_notify(wl->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, - GFP_KERNEL); + 0, GFP_KERNEL); } if (vector & ROAMING_TRIGGER_REGAINED_RSSI_EVENT_ID) { @@ -158,7 +158,7 @@ static int wl1251_event_process(struct wl1251 *wl, struct event_mailbox *mbox) "ROAMING_TRIGGER_REGAINED_RSSI_EVENT"); ieee80211_cqm_rssi_notify(wl->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, - GFP_KERNEL); + 0, GFP_KERNEL); } } diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c index 4b59f67724de..f2e90d223d94 100644 --- a/drivers/net/wireless/ti/wlcore/event.c +++ b/drivers/net/wireless/ti/wlcore/event.c @@ -129,7 +129,8 @@ void wlcore_event_rssi_trigger(struct wl1271 *wl, s8 *metric_arr) vif = wl12xx_wlvif_to_vif(wlvif); if (event != wlvif->last_rssi_event) - ieee80211_cqm_rssi_notify(vif, event, GFP_KERNEL); + ieee80211_cqm_rssi_notify(vif, event, metric, + GFP_KERNEL); wlvif->last_rssi_event = event; } } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 33624ffbd5a5..b9a08cd1d97d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5278,6 +5278,7 @@ void ieee80211_resume_disconnect(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @rssi_event: the RSSI trigger event type + * @rssi_level: new RSSI level value or 0 if not available * @gfp: context flags * * When the %IEEE80211_VIF_SUPPORTS_CQM_RSSI is set, and a connection quality @@ -5286,6 +5287,7 @@ void ieee80211_resume_disconnect(struct ieee80211_vif *vif); */ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level, gfp_t gfp); /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a6344518674..ee423688c92e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3419,14 +3419,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_cqm_rssi_notify( &sdata->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, - GFP_KERNEL); + sig, GFP_KERNEL); } else if (sig > thold && (last_event == 0 || sig > last_event + hyst)) { ifmgd->last_cqm_event_signal = sig; ieee80211_cqm_rssi_notify( &sdata->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, - GFP_KERNEL); + sig, GFP_KERNEL); } } @@ -5041,11 +5041,12 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - trace_api_cqm_rssi_notify(sdata, rssi_event); + trace_api_cqm_rssi_notify(sdata, rssi_event, rssi_level); cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 92a47afaa989..f78d9f4f8711 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1996,23 +1996,26 @@ TRACE_EVENT(api_connection_loss, TRACE_EVENT(api_cqm_rssi_notify, TP_PROTO(struct ieee80211_sub_if_data *sdata, - enum nl80211_cqm_rssi_threshold_event rssi_event), + enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level), - TP_ARGS(sdata, rssi_event), + TP_ARGS(sdata, rssi_event, rssi_level), TP_STRUCT__entry( VIF_ENTRY __field(u32, rssi_event) + __field(s32, rssi_level) ), TP_fast_assign( VIF_ASSIGN; __entry->rssi_event = rssi_event; + __entry->rssi_level = rssi_level; ), TP_printk( - VIF_PR_FMT " event:%d", - VIF_PR_ARG, __entry->rssi_event + VIF_PR_FMT " event:%d rssi:%d", + VIF_PR_ARG, __entry->rssi_event, __entry->rssi_level ) ); -- cgit v1.2.3 From bee427b86217b78a0a5fc85575cc155e4c32bbf9 Mon Sep 17 00:00:00 2001 From: Andrzej Zaborowski Date: Wed, 25 Jan 2017 12:43:41 +0100 Subject: cfg80211: Pass new RSSI level in CQM RSSI notification Update the drivers to pass the RSSI level as a cfg80211_cqm_rssi_notify parameter and pass this value to userspace in a new nl80211 attribute. This helps both userspace and also helps in the implementation of the multiple RSSI thresholds CQM mechanism. Note for marvell/mwifiex I pass 0 for the RSSI value because the new RSSI value is not available to the driver at the time of the cfg80211_cqm_rssi_notify call, but the driver queries the new value immediately after that, so it is actually available just a moment later if we wanted to defer caling cfg80211_cqm_rssi_notify until that moment. Without this, the new cfg80211 code (patch 3) will call .get_station which will send a duplicate HostCmd_CMD_RSSI_INFO command to the hardware. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/sta_event.c | 4 ++-- drivers/net/wireless/rndis_wlan.c | 2 +- include/net/cfg80211.h | 3 ++- include/uapi/linux/nl80211.h | 3 +++ net/mac80211/mlme.c | 2 +- net/wireless/nl80211.c | 9 +++++++-- net/wireless/trace.h | 11 +++++++---- 7 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 9df0c4dc06ed..5cc3aa7c31cd 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -824,7 +824,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_RSSI_LOW: cfg80211_cqm_rssi_notify(priv->netdev, NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, - GFP_KERNEL); + 0, GFP_KERNEL); mwifiex_send_cmd(priv, HostCmd_CMD_RSSI_INFO, HostCmd_ACT_GEN_GET, 0, NULL, false); priv->subsc_evt_rssi_state = RSSI_LOW_RECVD; @@ -839,7 +839,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_RSSI_HIGH: cfg80211_cqm_rssi_notify(priv->netdev, NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, - GFP_KERNEL); + 0, GFP_KERNEL); mwifiex_send_cmd(priv, HostCmd_CMD_RSSI_INFO, HostCmd_ACT_GEN_GET, 0, NULL, false); priv->subsc_evt_rssi_state = RSSI_HIGH_RECVD; diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 603c90470225..785334f7a538 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -3187,7 +3187,7 @@ static void rndis_do_cqm(struct usbnet *usbdev, s32 rssi) return; priv->last_cqm_event_rssi = rssi; - cfg80211_cqm_rssi_notify(usbdev->net, event, GFP_KERNEL); + cfg80211_cqm_rssi_notify(usbdev->net, event, rssi, GFP_KERNEL); } #define DEVICE_POLLER_JIFFIES (HZ) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5cfd2806a078..a2c18b53e053 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5390,6 +5390,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event * @dev: network device * @rssi_event: the triggered RSSI event + * @rssi_level: new RSSI level value or 0 if not available * @gfp: context flags * * This function is called when a configured connection quality monitoring @@ -5397,7 +5398,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, */ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp); + s32 rssi_level, gfp_t gfp); /** * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d6c62ee9bd1d..cd547b864595 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3952,6 +3952,8 @@ enum nl80211_ps_state { * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon * loss event + * @NL80211_ATTR_CQM_RSSI_LEVEL: the RSSI value in dBm that triggered the + * RSSI threshold event. * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -3965,6 +3967,7 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_TXE_PKTS, NL80211_ATTR_CQM_TXE_INTVL, NL80211_ATTR_CQM_BEACON_LOSS_EVENT, + NL80211_ATTR_CQM_RSSI_LEVEL, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ee423688c92e..6e90301154d5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5048,7 +5048,7 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, trace_api_cqm_rssi_notify(sdata, rssi_event, rssi_level); - cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); + cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, rssi_level, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b455898df63c..9d738f75bd4e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9474,6 +9474,7 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_RSSI_LEVEL] = { .type = NLA_S32 }, }; static int nl80211_set_cqm_txe(struct genl_info *info, @@ -13959,11 +13960,11 @@ static void cfg80211_send_cqm(struct sk_buff *msg, gfp_t gfp) void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp) + s32 rssi_level, gfp_t gfp) { struct sk_buff *msg; - trace_cfg80211_cqm_rssi_notify(dev, rssi_event); + trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); if (WARN_ON(rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW && rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) @@ -13977,6 +13978,10 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event)) goto nla_put_failure; + if (rssi_level && nla_put_s32(msg, NL80211_ATTR_CQM_RSSI_LEVEL, + rssi_level)) + goto nla_put_failure; + cfg80211_send_cqm(msg, gfp); return; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ea1b47e04fa4..2419c390f150 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2490,18 +2490,21 @@ TRACE_EVENT(cfg80211_mgmt_tx_status, TRACE_EVENT(cfg80211_cqm_rssi_notify, TP_PROTO(struct net_device *netdev, - enum nl80211_cqm_rssi_threshold_event rssi_event), - TP_ARGS(netdev, rssi_event), + enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level), + TP_ARGS(netdev, rssi_event, rssi_level), TP_STRUCT__entry( NETDEV_ENTRY __field(enum nl80211_cqm_rssi_threshold_event, rssi_event) + __field(s32, rssi_level) ), TP_fast_assign( NETDEV_ASSIGN; __entry->rssi_event = rssi_event; + __entry->rssi_level = rssi_level; ), - TP_printk(NETDEV_PR_FMT ", rssi event: %d", - NETDEV_PR_ARG, __entry->rssi_event) + TP_printk(NETDEV_PR_FMT ", rssi event: %d, level: %d", + NETDEV_PR_ARG, __entry->rssi_event, __entry->rssi_level) ); TRACE_EVENT(cfg80211_reg_can_beacon, -- cgit v1.2.3 From 5cb82a38c6b5152b1deaba0c1596ce63222a4710 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Jan 2017 18:30:07 +0100 Subject: netfilter: nf_tables: pass netns to set->ops->remove() This new parameter is required by the new bitmap set type that comes in a follow up patch. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- net/netfilter/nf_tables_api.c | 6 +++--- net/netfilter/nft_set_hash.c | 3 ++- net/netfilter/nft_set_rbtree.c | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7dfdb517f0be..a721bcb1210c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -298,7 +298,8 @@ struct nft_set_ops { bool (*deactivate_one)(const struct net *net, const struct nft_set *set, void *priv); - void (*remove)(const struct nft_set *set, + void (*remove)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 57eeae63f597..3643ce345b59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3752,7 +3752,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return 0; err6: - set->ops->remove(set, &elem); + set->ops->remove(ctx->net, set, &elem); err5: kfree(trans); err4: @@ -4804,7 +4804,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; break; @@ -4925,7 +4925,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; case NFT_MSG_DELSETELEM: diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e36069fb76ae..bb157bd47fe8 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net, return he; } -static void nft_hash_remove(const struct nft_set *set, +static void nft_hash_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f06f55ee516d..9fbd70da1633 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } -static void nft_rbtree_remove(const struct nft_set *set, +static void nft_rbtree_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); -- cgit v1.2.3 From 1ba1c41408df8a9d2f8b9b67e4c9e6f59b29d8ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Jan 2017 18:30:09 +0100 Subject: netfilter: nf_tables: rename deactivate_one() to flush() Although semantics are similar to deactivate() with no implicit element lookup, this is only called from the set flush path, so better rename this to flush(). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 ++++---- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_set_hash.c | 8 ++++---- net/netfilter/nft_set_rbtree.c | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a721bcb1210c..ab155644d489 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -260,7 +260,7 @@ struct nft_expr; * @insert: insert new element into set * @activate: activate new element in the next generation * @deactivate: lookup for element and deactivate it in the next generation - * @deactivate_one: deactivate element in the next generation + * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -295,9 +295,9 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*deactivate_one)(const struct net *net, - const struct nft_set *set, - void *priv); + bool (*flush)(const struct net *net, + const struct nft_set *set, + void *priv); void (*remove)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 790ffed82930..c09b11eb36fc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3898,7 +3898,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + if (!set->ops->flush(ctx->net, set, elem->priv)) { err = -ENOENT; goto err1; } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index bb157bd47fe8..2f10ac3b1b10 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_clear_busy(&he->ext); } -static bool nft_hash_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_hash_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; @@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL && - !nft_hash_deactivate_one(net, set, he)) + !nft_hash_flush(net, set, he)) he = NULL; rcu_read_unlock(); @@ -398,7 +398,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, - .deactivate_one = nft_hash_deactivate_one, + .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 9fbd70da1633..81b8a4c2c061 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -172,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_rbtree_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; @@ -214,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; continue; } - nft_rbtree_deactivate_one(net, set, rbe); + nft_rbtree_flush(net, set, rbe); return rbe; } } @@ -305,7 +305,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, - .deactivate_one = nft_rbtree_deactivate_one, + .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, -- cgit v1.2.3 From 1f48ff6c5393aa7fe290faf5d633164f105b0aa7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Jan 2017 18:30:10 +0100 Subject: netfilter: nf_tables: add flush field to struct nft_set_iter This provides context to walk callback iterator, thus, we know if the walk happens from the set flush path. This is required by the new bitmap set type coming in a follow up patch which has no real struct nft_set_ext, so it has to allocate it based on the two bit compact element representation. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ab155644d489..5830f594842e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -203,6 +203,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { u8 genmask; + bool flush; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c09b11eb36fc..7ae810b03462 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3121,6 +3121,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3374,6 +3375,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; + args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3939,6 +3941,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_set_iter iter = { .genmask = genmask, .fn = nft_flush_set, + .flush = true, }; set->ops->walk(&ctx, set, &iter); @@ -5089,6 +5092,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) -- cgit v1.2.3 From 55af753cd9fda9c5300f5318253b08bd15fb412e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Jan 2017 18:30:11 +0100 Subject: netfilter: nf_tables: rename struct nft_set_estimate class field Use lookup as field name instead, to prepare the introduction of the memory class in a follow up patch. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nf_tables_api.c | 12 ++++++------ net/netfilter/nft_set_hash.c | 2 +- net/netfilter/nft_set_rbtree.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5830f594842e..d76ac2f80a40 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -244,11 +244,11 @@ enum nft_set_class { * characteristics * * @size: required memory - * @class: lookup performance class + * @lookup: lookup performance class */ struct nft_set_estimate { unsigned int size; - enum nft_set_class class; + enum nft_set_class lookup; }; struct nft_set_ext; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7ae810b03462..fa7cd1679079 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2401,9 +2401,9 @@ nft_select_set_ops(const struct nlattr * const nla[], features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; } - bops = NULL; - best.size = ~0; - best.class = ~0; + bops = NULL; + best.size = ~0; + best.lookup = ~0; list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) @@ -2413,15 +2413,15 @@ nft_select_set_ops(const struct nlattr * const nla[], switch (policy) { case NFT_SET_POL_PERFORMANCE: - if (est.class < best.class) + if (est.lookup < best.lookup) break; - if (est.class == best.class && est.size < best.size) + if (est.lookup == best.lookup && est.size < best.size) break; continue; case NFT_SET_POL_MEMORY: if (est.size < best.size) break; - if (est.size == best.size && est.class < best.class) + if (est.size == best.size && est.lookup < best.lookup) break; continue; default: diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 2f10ac3b1b10..e58e7f02138b 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -384,7 +384,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, est->size = esize + 2 * sizeof(struct nft_hash_elem *); } - est->class = NFT_SET_CLASS_O_1; + est->lookup = NFT_SET_CLASS_O_1; return true; } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 81b8a4c2c061..2b6ea10c4bbd 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -291,7 +291,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, else est->size = nsize; - est->class = NFT_SET_CLASS_O_LOG_N; + est->lookup = NFT_SET_CLASS_O_LOG_N; return true; } -- cgit v1.2.3 From 0b5a78749260560f41e3b7c1f60f2c7dd9aff4f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Jan 2017 18:30:12 +0100 Subject: netfilter: nf_tables: add space notation to sets The space notation allows us to classify the set backend implementation based on the amount of required memory. This provides an order of the set representation scalability in terms of memory. The size field is still left in place so use this if the userspace provides no explicit number of elements, so we cannot calculate the real memory that this set needs. This also helps us break ties in the set backend selection routine, eg. two backend implementations provide the same performance. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 22 +++++++++++++++++----- net/netfilter/nft_set_hash.c | 1 + net/netfilter/nft_set_rbtree.c | 1 + 4 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d76ac2f80a40..21ce50e6d0c5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -245,10 +245,12 @@ enum nft_set_class { * * @size: required memory * @lookup: lookup performance class + * @space: memory class */ struct nft_set_estimate { unsigned int size; enum nft_set_class lookup; + enum nft_set_class space; }; struct nft_set_ext; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fa7cd1679079..cb6ae46f6c48 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2404,6 +2404,7 @@ nft_select_set_ops(const struct nlattr * const nla[], bops = NULL; best.size = ~0; best.lookup = ~0; + best.space = ~0; list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) @@ -2415,14 +2416,25 @@ nft_select_set_ops(const struct nlattr * const nla[], case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; - if (est.lookup == best.lookup && est.size < best.size) - break; + if (est.lookup == best.lookup) { + if (!desc->size) { + if (est.space < best.space) + break; + } else if (est.size < best.size) { + break; + } + } continue; case NFT_SET_POL_MEMORY: - if (est.size < best.size) - break; - if (est.size == best.size && est.lookup < best.lookup) + if (!desc->size) { + if (est.space < best.space) + break; + if (est.space == best.space && + est.lookup < best.lookup) + break; + } else if (est.size < best.size) { break; + } continue; default: break; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e58e7f02138b..6938bc890f31 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -385,6 +385,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, } est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_N; return true; } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2b6ea10c4bbd..3387ed7dd231 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -292,6 +292,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, est->size = nsize; est->lookup = NFT_SET_CLASS_O_LOG_N; + est->space = NFT_SET_CLASS_O_N; return true; } -- cgit v1.2.3 From 97e219b7c1f75b14b29abe28ad53e8709e8d15e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Feb 2017 15:37:15 -0800 Subject: gro_cells: move to net/core/gro_cells.c We have many gro cells users, so lets move the code to avoid duplication. This creates a CONFIG_GRO_CELLS option. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/Kconfig | 3 ++ include/net/gro_cells.h | 86 +++------------------------------------------ net/Kconfig | 4 +++ net/core/Makefile | 1 + net/core/gro_cells.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/Kconfig | 1 + net/ipv6/Kconfig | 1 + net/xfrm/Kconfig | 1 + 8 files changed, 107 insertions(+), 82 deletions(-) create mode 100644 net/core/gro_cells.c (limited to 'include/net') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 95c32f2d7601..a993cbeb9e0c 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -170,6 +170,7 @@ config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" depends on INET select NET_UDP_TUNNEL + select GRO_CELLS ---help--- This allows one to create vxlan virtual interfaces that provide Layer 2 Networks over Layer 3 Networks. VXLAN is often used @@ -184,6 +185,7 @@ config GENEVE tristate "Generic Network Virtualization Encapsulation" depends on INET && NET_UDP_TUNNEL select NET_IP_TUNNEL + select GRO_CELLS ---help--- This allows one to create geneve virtual interfaces that provide Layer 2 Networks over Layer 3 Networks. GENEVE is often used @@ -216,6 +218,7 @@ config MACSEC select CRYPTO select CRYPTO_AES select CRYPTO_GCM + select GRO_CELLS ---help--- MACsec is an encryption standard for Ethernet. diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index 2a1abbf8da74..fcaf8f479130 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -5,92 +5,14 @@ #include #include -struct gro_cell { - struct sk_buff_head napi_skbs; - struct napi_struct napi; -}; +struct gro_cell; struct gro_cells { struct gro_cell __percpu *cells; }; -static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) -{ - struct gro_cell *cell; - struct net_device *dev = skb->dev; - - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) - return netif_rx(skb); - - cell = this_cpu_ptr(gcells->cells); - - if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - __skb_queue_tail(&cell->napi_skbs, skb); - if (skb_queue_len(&cell->napi_skbs) == 1) - napi_schedule(&cell->napi); - return NET_RX_SUCCESS; -} - -/* called under BH context */ -static inline int gro_cell_poll(struct napi_struct *napi, int budget) -{ - struct gro_cell *cell = container_of(napi, struct gro_cell, napi); - struct sk_buff *skb; - int work_done = 0; - - while (work_done < budget) { - skb = __skb_dequeue(&cell->napi_skbs); - if (!skb) - break; - napi_gro_receive(napi, skb); - work_done++; - } - - if (work_done < budget) - napi_complete_done(napi, work_done); - return work_done; -} - -static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) -{ - int i; - - gcells->cells = alloc_percpu(struct gro_cell); - if (!gcells->cells) - return -ENOMEM; - - for_each_possible_cpu(i) { - struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); - - __skb_queue_head_init(&cell->napi_skbs); - - set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); - - netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); - napi_enable(&cell->napi); - } - return 0; -} - -static inline void gro_cells_destroy(struct gro_cells *gcells) -{ - int i; - - if (!gcells->cells) - return; - for_each_possible_cpu(i) { - struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); - - netif_napi_del(&cell->napi); - __skb_queue_purge(&cell->napi_skbs); - } - free_percpu(gcells->cells); - gcells->cells = NULL; -} +int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb); +int gro_cells_init(struct gro_cells *gcells, struct net_device *dev); +void gro_cells_destroy(struct gro_cells *gcells); #endif diff --git a/net/Kconfig b/net/Kconfig index 2f2842d2d3ed..f19c0c3b9589 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -413,6 +413,10 @@ config DST_CACHE bool default n +config GRO_CELLS + bool + default n + config NET_DEVLINK tristate "Network physical/parent device Netlink interface" help diff --git a/net/core/Makefile b/net/core/Makefile index f6761b6e3b29..79f9479e9658 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o +obj-$(CONFIG_GRO_CELLS) += gro_cells.o diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c new file mode 100644 index 000000000000..c98bbfbd26b8 --- /dev/null +++ b/net/core/gro_cells.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +struct gro_cell { + struct sk_buff_head napi_skbs; + struct napi_struct napi; +}; + +int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct gro_cell *cell; + + if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + return netif_rx(skb); + + cell = this_cpu_ptr(gcells->cells); + + if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + __skb_queue_tail(&cell->napi_skbs, skb); + if (skb_queue_len(&cell->napi_skbs) == 1) + napi_schedule(&cell->napi); + return NET_RX_SUCCESS; +} +EXPORT_SYMBOL(gro_cells_receive); + +/* called under BH context */ +static int gro_cell_poll(struct napi_struct *napi, int budget) +{ + struct gro_cell *cell = container_of(napi, struct gro_cell, napi); + struct sk_buff *skb; + int work_done = 0; + + while (work_done < budget) { + skb = __skb_dequeue(&cell->napi_skbs); + if (!skb) + break; + napi_gro_receive(napi, skb); + work_done++; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + return work_done; +} + +int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) +{ + int i; + + gcells->cells = alloc_percpu(struct gro_cell); + if (!gcells->cells) + return -ENOMEM; + + for_each_possible_cpu(i) { + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + + netif_napi_add(dev, &cell->napi, gro_cell_poll, + NAPI_POLL_WEIGHT); + napi_enable(&cell->napi); + } + return 0; +} +EXPORT_SYMBOL(gro_cells_init); + +void gro_cells_destroy(struct gro_cells *gcells) +{ + int i; + + if (!gcells->cells) + return; + for_each_possible_cpu(i) { + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + netif_napi_del(&cell->napi); + __skb_queue_purge(&cell->napi_skbs); + } + free_percpu(gcells->cells); + gcells->cells = NULL; +} +EXPORT_SYMBOL(gro_cells_destroy); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 6e7baaf814c6..e30f9caddae8 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -187,6 +187,7 @@ config NET_IPGRE_DEMUX config NET_IP_TUNNEL tristate select DST_CACHE + select GRO_CELLS default n config NET_IPGRE diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ec1267e2bd1f..3c7c76b2a7ba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -208,6 +208,7 @@ config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL select DST_CACHE + select GRO_CELLS ---help--- Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index bda1a13628a8..c06d3997c6e7 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -4,6 +4,7 @@ config XFRM bool depends on NET + select GRO_CELLS config XFRM_ALGO tristate -- cgit v1.2.3 From 982acb97560c8118c2109504a22b0d78a580547d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 8 Feb 2017 11:16:39 +0100 Subject: ipv4: fib: Notify about nexthop status changes When a multipath route is hit the kernel doesn't consider nexthops that are DEAD or LINKDOWN when IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN is set. Devices that offload multipath routes need to be made aware of nexthop status changes. Otherwise, the device will keep forwarding packets to non-functional nexthops. Add the FIB_EVENT_NH_{ADD,DEL} events to the fib notification chain, which notify capable devices when they should add or delete a nexthop from their tables. Cc: Roopa Prabhu Cc: David Ahern Cc: Andy Gospodarek Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/net/ip_fib.h | 7 +++++++ net/ipv4/fib_semantics.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 57c2a863d0b2..45a184eaff2b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -214,11 +214,18 @@ struct fib_entry_notifier_info { u32 nlflags; }; +struct fib_nh_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_nh *fib_nh; +}; + enum fib_event_type { FIB_EVENT_ENTRY_ADD, FIB_EVENT_ENTRY_DEL, FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, }; int register_fib_notifier(struct notifier_block *nb, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6306a67880e8..317026a39cfa 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1355,6 +1355,36 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) return ret; } +static int call_fib_nh_notifiers(struct fib_nh *fib_nh, + enum fib_event_type event_type) +{ + struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); + struct fib_nh_notifier_info info = { + .fib_nh = fib_nh, + }; + + switch (event_type) { + case FIB_EVENT_NH_ADD: + if (fib_nh->nh_flags & RTNH_F_DEAD) + break; + if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) + break; + return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); + case FIB_EVENT_NH_DEL: + if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) || + (fib_nh->nh_flags & RTNH_F_DEAD)) + return call_fib_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); + default: + break; + } + + return NOTIFY_DONE; +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host @@ -1396,6 +1426,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; break; } + call_fib_nh_notifiers(nexthop_nh, + FIB_EVENT_NH_DEL); dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1550,6 +1582,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) continue; alive++; nexthop_nh->nh_flags &= ~nh_flags; + call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) if (alive > 0) { -- cgit v1.2.3 From 8585989d146c61dd073d2135c5bb11d0f979d576 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 8 Feb 2017 15:00:34 +0200 Subject: cfg80211: fix NAN bands definition The nl80211_nan_dual_band_conf enumeration doesn't make much sense. The default value is assigned to a bit, which makes it weird if the default bit and other bits are set at the same time. To improve this, get rid of NL80211_NAN_BAND_DEFAULT and add a wiphy configuration to let the drivers define which bands are supported. This is exposed to the userspace, which then can make a decision on which band(s) to use. Additionally, rename all "dual_band" elements to "bands", to make things clearer. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 18 ++++++++++---- include/uapi/linux/nl80211.h | 57 ++++++++++++++++++++------------------------ net/mac80211/cfg.c | 4 ++-- net/mac80211/trace.h | 16 ++++++------- net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 35 ++++++++++++++++++++------- net/wireless/trace.h | 16 ++++++------- 7 files changed, 86 insertions(+), 63 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a2c18b53e053..c92dc03c8528 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,7 +5,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015-2016 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2416,11 +2416,13 @@ struct cfg80211_qos_map { * This struct defines NAN configuration parameters * * @master_pref: master preference (1 - 255) - * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf + * @bands: operating bands, a bitmap of &enum nl80211_band values. + * For instance, for NL80211_BAND_2GHZ, bit 0 would be set + * (i.e. BIT(NL80211_BAND_2GHZ)). */ struct cfg80211_nan_conf { u8 master_pref; - u8 dual; + u8 bands; }; /** @@ -2428,11 +2430,11 @@ struct cfg80211_nan_conf { * configuration * * @CFG80211_NAN_CONF_CHANGED_PREF: master preference - * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation + * @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands */ enum cfg80211_nan_conf_changes { CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), - CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1), + CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1), }; /** @@ -3596,6 +3598,10 @@ struct wiphy_iftype_ext_capab { * attribute indices defined in &enum nl80211_bss_select_attr. * * @cookie_counter: unique generic cookie counter, used to identify objects. + * @nan_supported_bands: bands supported by the device in NAN mode, a + * bitmap of &enum nl80211_band values. For instance, for + * NL80211_BAND_2GHZ, bit 0 would be set + * (i.e. BIT(NL80211_BAND_2GHZ)). */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3727,6 +3733,8 @@ struct wiphy { u64 cookie_counter; + u8 nan_supported_bands; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cd547b864595..5ed257c4cd4e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -10,7 +10,7 @@ * Copyright 2008, 2009 Luis R. Rodriguez * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -854,12 +854,15 @@ * cfg80211_scan_done(). * * @NL80211_CMD_START_NAN: Start NAN operation, identified by its - * %NL80211_ATTR_WDEV interface. This interface must have been previously - * created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the - * NAN interface will create or join a cluster. This command must have a - * valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional - * %NL80211_ATTR_NAN_DUAL attributes. - * After this command NAN functions can be added. + * %NL80211_ATTR_WDEV interface. This interface must have been + * previously created with %NL80211_CMD_NEW_INTERFACE. After it + * has been started, the NAN interface will create or join a + * cluster. This command must have a valid + * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is + * omitted or set to 0, it means don't-care and the device will + * decide what to use. After this command NAN functions can be + * added. * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by * its %NL80211_ATTR_WDEV interface. * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined @@ -880,10 +883,14 @@ * This command is also used as a notification sent when a NAN function is * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID * and %NL80211_ATTR_COOKIE attributes. - * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN - * must be operational (%NL80211_CMD_START_NAN was executed). - * It must contain at least one of the following attributes: - * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN + * configuration. NAN must be operational (%NL80211_CMD_START_NAN + * was executed). It must contain at least one of the following + * attributes: %NL80211_ATTR_NAN_MASTER_PREF, + * %NL80211_ATTR_BANDS. If %NL80211_ATTR_BANDS is omitted, the + * current configuration is not changed. If it is present but + * set to zero, the configuration is changed to don't-care + * (i.e. the device can decide what to do). * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. @@ -1963,10 +1970,13 @@ enum nl80211_commands { * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. * Also, values 1 and 255 are reserved for certification purposes and * should not be used during a normal device operation. - * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see - * &enum nl80211_nan_dual_band_conf). This attribute is used with - * %NL80211_CMD_START_NAN and optionally with - * %NL80211_CMD_CHANGE_NAN_CONFIG. + * @NL80211_ATTR_BANDS: operating bands configuration. This is a u32 + * bitmask of BIT(NL80211_BAND_*) as described in %enum + * nl80211_band. For instance, for NL80211_BAND_2GHZ, bit 0 + * would be set. This attribute is used with + * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG, and + * it is optional. If no bands are set, it means don't-care and + * the device will decide what to use. * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See * &enum nl80211_nan_func_attributes for description of this nested * attribute. @@ -2397,7 +2407,7 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PEER_AID, NL80211_ATTR_NAN_MASTER_PREF, - NL80211_ATTR_NAN_DUAL, + NL80211_ATTR_BANDS, NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, @@ -5070,21 +5080,6 @@ enum nl80211_bss_select_attr { NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 }; -/** - * enum nl80211_nan_dual_band_conf - NAN dual band configuration - * - * Defines the NAN dual band mode of operation - * - * @NL80211_NAN_BAND_DEFAULT: device default mode - * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode - * @NL80211_NAN_BAND_5GHZ: 5GHz mode - */ -enum nl80211_nan_dual_band_conf { - NL80211_NAN_BAND_DEFAULT = 1 << 0, - NL80211_NAN_BAND_2GHZ = 1 << 1, - NL80211_NAN_BAND_5GHZ = 1 << 2, -}; - /** * enum nl80211_nan_function_type - NAN function type * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a0be2f6cd121..ac879bb17870 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -208,8 +208,8 @@ static int ieee80211_nan_change_conf(struct wiphy *wiphy, if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; - if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) - new_conf.dual = conf->dual; + if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) + new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index f78d9f4f8711..0d645bc148d0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1736,21 +1736,21 @@ TRACE_EVENT(drv_start_nan, LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) - __field(u8, dual) + __field(u8, bands) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT - ", master preference: %u, dual: %d", + ", master preference: %u, bands: 0x%0x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, - __entry->dual + __entry->bands ) ); @@ -1787,7 +1787,7 @@ TRACE_EVENT(drv_nan_change_conf, LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) - __field(u8, dual) + __field(u8, bands) __field(u32, changes) ), @@ -1795,15 +1795,15 @@ TRACE_EVENT(drv_nan_change_conf, LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT - ", master preference: %u, dual: %d, changes: 0x%x", + ", master preference: %u, bands: 0x%0x, changes: 0x%x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, - __entry->dual, __entry->changes + __entry->bands, __entry->changes ) ); diff --git a/net/wireless/core.c b/net/wireless/core.c index 903fc419217a..e55e05bc4805 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -626,7 +626,8 @@ int wiphy_register(struct wiphy *wiphy) if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && (!rdev->ops->start_nan || !rdev->ops->stop_nan || - !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func || + !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ))))) return -EINVAL; #ifndef CONFIG_WIRELESS_WDS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9d738f75bd4e..b5f755b3ac5d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -398,7 +398,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, - [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_BANDS] = { .type = NLA_U32 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, .len = FILS_MAX_KEK_LEN }, @@ -1886,6 +1886,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } } + if (nla_put_u32(msg, NL80211_ATTR_BANDS, + rdev->wiphy.nan_supported_bands)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -10777,15 +10781,22 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_NAN_DUAL]) - return -EINVAL; - conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (!conf.master_pref) return -EINVAL; - conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + if (info->attrs[NL80211_ATTR_BANDS]) { + u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); + + if (bands & ~(u32)wdev->wiphy->nan_supported_bands) + return -EOPNOTSUPP; + + if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) + return -EINVAL; + + conf.bands = bands; + } err = rdev_start_nan(rdev, wdev, &conf); if (err) @@ -11150,9 +11161,17 @@ static int nl80211_nan_change_config(struct sk_buff *skb, changed |= CFG80211_NAN_CONF_CHANGED_PREF; } - if (info->attrs[NL80211_ATTR_NAN_DUAL]) { - conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); - changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + if (info->attrs[NL80211_ATTR_BANDS]) { + u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); + + if (bands & ~(u32)wdev->wiphy->nan_supported_bands) + return -EOPNOTSUPP; + + if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) + return -EINVAL; + + conf.bands = bands; + changed |= CFG80211_NAN_CONF_CHANGED_BANDS; } if (!changed) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2419c390f150..776e80cef9b4 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1915,18 +1915,18 @@ TRACE_EVENT(rdev_start_nan, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, dual); + __field(u8, bands); ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT - ", master preference: %u, dual: %d", + ", master preference: %u, bands: 0x%0x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, - __entry->dual) + __entry->bands) ); TRACE_EVENT(rdev_nan_change_conf, @@ -1937,20 +1937,20 @@ TRACE_EVENT(rdev_nan_change_conf, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, dual); + __field(u8, bands); __field(u32, changes); ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT - ", master preference: %u, dual: %d, changes: %x", + ", master preference: %u, bands: 0x%0x, changes: %x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, - __entry->dual, __entry->changes) + __entry->bands, __entry->changes) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, -- cgit v1.2.3 From c56480a1e90261842f54f3a5a9ebc12d827f0c3e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:17 +0800 Subject: sctp: add support for generating stream reconf ssn/tsn reset request chunk This patch is to define SSN/TSN Reset Request Parameter described in rfc6525 section 4.3. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 +++++ include/net/sctp/sm.h | 2 ++ net/sctp/sm_make_chunk.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'include/net') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d74fca3f3141..71c0d41d9a59 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -737,4 +737,9 @@ struct sctp_strreset_inreq { __u16 list_of_streams[0]; }; +struct sctp_strreset_tsnreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 430ed139fbbb..ac37c1782e23 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -265,6 +265,8 @@ struct sctp_chunk *sctp_make_strreset_req( const struct sctp_association *asoc, __u16 stream_num, __u16 *stream_list, bool out, bool in); +struct sctp_chunk *sctp_make_strreset_tsnreq( + const struct sctp_association *asoc); void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index c7d3249f88ec..749842aead33 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3658,3 +3658,32 @@ struct sctp_chunk *sctp_make_strreset_req( return retval; } + +/* RE-CONFIG 4.3 (SSN/TSN RESET ALL) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 15 | Parameter Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_tsnreq( + const struct sctp_association *asoc) +{ + struct sctp_strreset_tsnreq tsnreq; + __u16 length = sizeof(tsnreq); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, length); + if (!retval) + return NULL; + + tsnreq.param_hdr.type = SCTP_PARAM_RESET_TSN_REQUEST; + tsnreq.param_hdr.length = htons(length); + tsnreq.request_seq = htonl(asoc->strreset_outseq); + + sctp_addto_chunk(retval, sizeof(tsnreq), &tsnreq); + + return retval; +} -- cgit v1.2.3 From a92ce1a42dde1caaee4afae67531e3e7acecf6e4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:18 +0800 Subject: sctp: implement sender-side procedures for SSN/TSN Reset Request Parameter This patch is to implement Sender-Side Procedures for the SSN/TSN Reset Request Parameter descibed in rfc6525 section 5.1.4. It is also to add sockopt SCTP_RESET_ASSOC in rfc6525 section 6.3.3 for users. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 29 +++++++++++++++++++++++++++++ net/sctp/stream.c | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 480b65a24aff..b60ca14068d8 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -198,6 +198,7 @@ int sctp_offload_init(void); */ int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params); +int sctp_send_reset_assoc(struct sctp_association *asoc); /* * Module global variables diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 03c27cefffb1..c0bd8c3d565a 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -117,6 +117,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_ASSOC_STATUS 115 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 +#define SCTP_RESET_ASSOC 120 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a8b4252fe084..45a7c417eb7f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3818,6 +3818,32 @@ out: return retval; } +static int sctp_setsockopt_reset_assoc(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + sctp_assoc_t associd; + int retval = -EINVAL; + + if (optlen != sizeof(associd)) + goto out; + + if (copy_from_user(&associd, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, associd); + if (!asoc) + goto out; + + retval = sctp_send_reset_assoc(asoc); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3990,6 +4016,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_RESET_STREAMS: retval = sctp_setsockopt_reset_streams(sk, optval, optlen); break; + case SCTP_RESET_ASSOC: + retval = sctp_setsockopt_reset_assoc(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6a686e330c57..53e49fc2f0a3 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -177,3 +177,43 @@ int sctp_send_reset_streams(struct sctp_association *asoc, out: return retval; } + +int sctp_send_reset_assoc(struct sctp_association *asoc) +{ + struct sctp_chunk *chunk = NULL; + int retval; + __u16 i; + + if (!asoc->peer.reconf_capable || + !(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) + return -ENOPROTOOPT; + + if (asoc->strreset_outstanding) + return -EINPROGRESS; + + chunk = sctp_make_strreset_tsnreq(asoc); + if (!chunk) + return -ENOMEM; + + /* Block further xmit of data until this request is completed */ + for (i = 0; i < asoc->stream->outcnt; i++) + asoc->stream->out[i].state = SCTP_STREAM_CLOSED; + + asoc->strreset_chunk = chunk; + sctp_chunk_hold(asoc->strreset_chunk); + + retval = sctp_send_reconf(asoc, chunk); + if (retval) { + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + + for (i = 0; i < asoc->stream->outcnt; i++) + asoc->stream->out[i].state = SCTP_STREAM_OPEN; + + return retval; + } + + asoc->strreset_outstanding = 1; + + return 0; +} -- cgit v1.2.3 From 78098117f8bfad4f2104c3f7b6b69071af95a246 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:19 +0800 Subject: sctp: add support for generating stream reconf add incoming/outgoing streams request chunk This patch is to define Add Incoming/Outgoing Streams Request Parameter described in rfc6525 section 4.5 and 4.6. They can be in one same chunk trunk as rfc6525 section 3.1-7 describes, so make them in one function. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/sctp.h | 7 +++++++ include/net/sctp/sm.h | 3 +++ net/sctp/sm_make_chunk.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'include/net') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 71c0d41d9a59..b055788de0cf 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -742,4 +742,11 @@ struct sctp_strreset_tsnreq { __u32 request_seq; }; +struct sctp_strreset_addstrm { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u16 number_of_streams; + __u16 reserved; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index ac37c1782e23..3675fde3a26e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -267,6 +267,9 @@ struct sctp_chunk *sctp_make_strreset_req( bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_strreset_addstrm( + const struct sctp_association *asoc, + __u16 out, __u16 in); void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 749842aead33..7f8dbf2c6cee 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3687,3 +3687,49 @@ struct sctp_chunk *sctp_make_strreset_tsnreq( return retval; } + +/* RE-CONFIG 4.5/4.6 (ADD STREAM) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 17 | Parameter Length = 12 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of new streams | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_addstrm( + const struct sctp_association *asoc, + __u16 out, __u16 in) +{ + struct sctp_strreset_addstrm addstrm; + __u16 size = sizeof(addstrm); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, (!!out + !!in) * size); + if (!retval) + return NULL; + + if (out) { + addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_OUT_STREAMS; + addstrm.param_hdr.length = htons(size); + addstrm.number_of_streams = htons(out); + addstrm.request_seq = htonl(asoc->strreset_outseq); + addstrm.reserved = 0; + + sctp_addto_chunk(retval, size, &addstrm); + } + + if (in) { + addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_IN_STREAMS; + addstrm.param_hdr.length = htons(size); + addstrm.number_of_streams = htons(in); + addstrm.request_seq = htonl(asoc->strreset_outseq + !!out); + addstrm.reserved = 0; + + sctp_addto_chunk(retval, size, &addstrm); + } + + return retval; +} -- cgit v1.2.3 From 242bd2d519d7194633e309286ba7ba29a1ad63e8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:20 +0800 Subject: sctp: implement sender-side procedures for Add Incoming/Outgoing Streams Request Parameter This patch is to implement Sender-Side Procedures for the Add Outgoing and Incoming Streams Request Parameter described in rfc6525 section 5.1.5-5.1.6. It is also to add sockopt SCTP_ADD_STREAMS in rfc6525 section 6.3.4 for users. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 ++ include/uapi/linux/sctp.h | 7 +++++ net/sctp/socket.c | 29 ++++++++++++++++++ net/sctp/stream.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b60ca14068d8..6dfc5536a3e6 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -199,6 +199,8 @@ int sctp_offload_init(void); int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params); int sctp_send_reset_assoc(struct sctp_association *asoc); +int sctp_send_add_streams(struct sctp_association *asoc, + struct sctp_add_streams *params); /* * Module global variables diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c0bd8c3d565a..a91a9cccbae6 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -118,6 +118,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 +#define SCTP_ADD_STREAMS 121 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1027,4 +1028,10 @@ struct sctp_reset_streams { uint16_t srs_stream_list[]; /* list if srs_num_streams is not 0 */ }; +struct sctp_add_streams { + sctp_assoc_t sas_assoc_id; + uint16_t sas_instrms; + uint16_t sas_outstrms; +}; + #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 45a7c417eb7f..75f35cea4371 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3844,6 +3844,32 @@ out: return retval; } +static int sctp_setsockopt_add_streams(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_add_streams params; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.sas_assoc_id); + if (!asoc) + goto out; + + retval = sctp_send_add_streams(asoc, ¶ms); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4019,6 +4045,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_RESET_ASSOC: retval = sctp_setsockopt_reset_assoc(sk, optval, optlen); break; + case SCTP_ADD_STREAMS: + retval = sctp_setsockopt_add_streams(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 53e49fc2f0a3..eb02490245ba 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -217,3 +217,80 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) return 0; } + +int sctp_send_add_streams(struct sctp_association *asoc, + struct sctp_add_streams *params) +{ + struct sctp_stream *stream = asoc->stream; + struct sctp_chunk *chunk = NULL; + int retval = -ENOMEM; + __u32 outcnt, incnt; + __u16 out, in; + + if (!asoc->peer.reconf_capable || + !(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { + retval = -ENOPROTOOPT; + goto out; + } + + if (asoc->strreset_outstanding) { + retval = -EINPROGRESS; + goto out; + } + + out = params->sas_outstrms; + in = params->sas_instrms; + outcnt = stream->outcnt + out; + incnt = stream->incnt + in; + if (outcnt > SCTP_MAX_STREAM || incnt > SCTP_MAX_STREAM || + (!out && !in)) { + retval = -EINVAL; + goto out; + } + + if (out) { + struct sctp_stream_out *streamout; + + streamout = krealloc(stream->out, outcnt * sizeof(*streamout), + GFP_KERNEL); + if (!streamout) + goto out; + + memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); + stream->out = streamout; + } + + if (in) { + struct sctp_stream_in *streamin; + + streamin = krealloc(stream->in, incnt * sizeof(*streamin), + GFP_KERNEL); + if (!streamin) + goto out; + + memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); + stream->in = streamin; + } + + chunk = sctp_make_strreset_addstrm(asoc, out, in); + if (!chunk) + goto out; + + asoc->strreset_chunk = chunk; + sctp_chunk_hold(asoc->strreset_chunk); + + retval = sctp_send_reconf(asoc, chunk); + if (retval) { + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + goto out; + } + + stream->incnt = incnt; + stream->outcnt = outcnt; + + asoc->strreset_outstanding = !!out + !!in; + +out: + return retval; +} -- cgit v1.2.3 From 2f3a5272e5c16c3c10fbba06928a513f9b1e2fcd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Feb 2017 10:28:41 +0100 Subject: ipv4: fib: Add events for FIB replace and append The FIB notification chain currently uses the NLM_F_{REPLACE,APPEND} flags to signal routes being replaced or appended. Instead of using netlink flags for in-kernel notifications we can simply introduce two new events in the FIB notification chain. This has the added advantage of making the API cleaner, thereby making it clear that these events should be supported by listeners of the notification chain. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko CC: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_trie.c | 27 ++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 45a184eaff2b..368bb4024b78 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -211,7 +211,6 @@ struct fib_entry_notifier_info { u8 tos; u8 type; u32 tb_id; - u32 nlflags; }; struct fib_nh_notifier_info { @@ -220,6 +219,8 @@ struct fib_nh_notifier_info { }; enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, FIB_EVENT_ENTRY_ADD, FIB_EVENT_ENTRY_DEL, FIB_EVENT_RULE_ADD, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1c4d42e46dbb..d8cea210af0e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -124,7 +124,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb, static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -133,7 +133,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifier(nb, net, event_type, &info.info); } @@ -197,7 +196,7 @@ int call_fib_notifiers(struct net *net, enum fib_event_type event_type, static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -206,7 +205,6 @@ static int call_fib_entry_notifiers(struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifiers(net, event_type, &info.info); } @@ -1198,6 +1196,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_config *cfg) { + enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; @@ -1295,10 +1294,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, key, plen, fi, new_fa->fa_tos, cfg->fc_type, - tb->tb_id, nlflags); + tb->tb_id); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1319,10 +1318,12 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (fa_match) goto out; - if (cfg->fc_nlflags & NLM_F_APPEND) + if (cfg->fc_nlflags & NLM_F_APPEND) { + event = FIB_EVENT_ENTRY_APPEND; nlflags |= NLM_F_APPEND; - else + } else { fa = fa_first; + } } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) @@ -1351,8 +1352,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, - cfg->fc_type, tb->tb_id, cfg->fc_nlflags); + call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, + tb->tb_id); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1654,7 +1655,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, fa_to_delete->fa_info, tos, - fa_to_delete->fa_type, tb->tb_id, 0); + fa_to_delete->fa_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1973,7 +1974,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, - tb->tb_id, 0); + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2013,7 +2014,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, call_fib_entry_notifier(nb, net, event_type, l->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, - fa->fa_type, fa->tb_id, 0); + fa->fa_type, fa->tb_id); } } -- cgit v1.2.3 From 79112c26f14c38ddbac3b2739469e373ef424fe6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Feb 2017 14:38:55 +0100 Subject: sched: rename tcf_destroy to tcf_destroy_proto This function destroys TC filter protocol, not TC filter. So name it accordingly. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- net/sched/cls_api.c | 8 ++++---- net/sched/sch_api.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e2f426f6d62f..453350650b9a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -405,7 +405,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, u32 parentid); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); -bool tcf_destroy(struct tcf_proto *tp, bool force); +bool tcf_proto_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1ecdf809b5fa..90536ebae02a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -323,7 +323,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); err = 0; goto errout; } @@ -338,7 +338,7 @@ replay: err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); goto errout; } break; @@ -350,7 +350,7 @@ replay: tfilter_notify(net, skb, n, tp, t->tcm_handle, RTM_DELTFILTER, false); - if (tcf_destroy(tp, false)) + if (tcf_proto_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; @@ -374,7 +374,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); } errout: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ef53ede11590..f30b517f2282 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1900,7 +1900,7 @@ reset: } EXPORT_SYMBOL(tc_classify); -bool tcf_destroy(struct tcf_proto *tp, bool force) +bool tcf_proto_destroy(struct tcf_proto *tp, bool force) { if (tp->ops->destroy(tp, force)) { module_put(tp->ops->owner); @@ -1917,7 +1917,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl) while ((tp = rtnl_dereference(*fl)) != NULL) { RCU_INIT_POINTER(*fl, tp->next); - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); } } EXPORT_SYMBOL(tcf_destroy_chain); -- cgit v1.2.3 From cf1facda2f61bc3e9ffd985b6d624dec6ad3f279 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Feb 2017 14:38:56 +0100 Subject: sched: move tcf_proto_destroy and tcf_destroy_chain helpers into cls_api Creation is done in this file, move destruction to be at the same place. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ include/net/sch_generic.h | 2 -- net/sched/cls_api.c | 21 +++++++++++++++++++++ net/sched/sch_api.c | 22 ---------------------- net/sched/sch_atm.c | 1 + net/sched/sch_cbq.c | 1 + net/sched/sch_choke.c | 1 + net/sched/sch_dsmark.c | 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_ingress.c | 1 + net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_sfb.c | 1 + net/sched/sch_sfq.c | 1 + 15 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dabb00af46a0..71b266cd63d4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,6 +17,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +void tcf_destroy_chain(struct tcf_proto __rcu **fl); + static inline unsigned long __cls_set_class(unsigned long *clp, unsigned long cl) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 453350650b9a..aeec4086afb2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -405,8 +405,6 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, u32 parentid); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); -bool tcf_proto_destroy(struct tcf_proto *tp, bool force); -void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); static inline void skb_reset_tc(struct sk_buff *skb) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 90536ebae02a..4efa4df8322f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -127,6 +127,27 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return first; } +static bool tcf_proto_destroy(struct tcf_proto *tp, bool force) +{ + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + return false; +} + +void tcf_destroy_chain(struct tcf_proto __rcu **fl) +{ + struct tcf_proto *tp; + + while ((tp = rtnl_dereference(*fl)) != NULL) { + RCU_INIT_POINTER(*fl, tp->next); + tcf_proto_destroy(tp, true); + } +} +EXPORT_SYMBOL(tcf_destroy_chain); + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f30b517f2282..adeabaec0d0b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1900,28 +1900,6 @@ reset: } EXPORT_SYMBOL(tc_classify); -bool tcf_proto_destroy(struct tcf_proto *tp, bool force) -{ - if (tp->ops->destroy(tp, force)) { - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); - return true; - } - - return false; -} - -void tcf_destroy_chain(struct tcf_proto __rcu **fl) -{ - struct tcf_proto *tp; - - while ((tp = rtnl_dereference(*fl)) != NULL) { - RCU_INIT_POINTER(*fl, tp->next); - tcf_proto_destroy(tp, true); - } -} -EXPORT_SYMBOL(tcf_destroy_chain); - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 481e4f12aeb4..2209c2ddacbf 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -15,6 +15,7 @@ #include /* for fput */ #include #include +#include /* * The ATM queuing discipline provides a framework for invoking classifiers diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f1207582cbf3..d6ca18dc04c3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Class-Based Queueing (CBQ) algorithm. diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b6d5bd69101..3b86a97bc67c 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1308bbf460f7..802ac7c2e5e8 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 2f50e4c72fb4..9f3a884d1590 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 760f39e7caee..4cd5fb134bc9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -40,6 +40,7 @@ #include #include #include +#include /* HTB algorithm. Author: devik@cdi.cz diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 8fe6999b642a..3bab5f66c392 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -16,6 +16,7 @@ #include #include +#include static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 9ffbb025b37e..e7839a0d0eaa 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -25,7 +25,7 @@ #include #include #include - +#include struct multiq_sched_data { u16 bands; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8f575899adfa..d4d7db267b6e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -20,7 +20,7 @@ #include #include #include - +#include struct prio_sched_data { int bands; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 20a350bd1b1d..fe6963d21519 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /* diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7f195ed4d568..83d06e251b93 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -23,6 +23,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From 147c1e9b902c25c868024260d24bb0b1dac1433d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Thu, 9 Feb 2017 14:54:40 +0100 Subject: switchdev: bridge: Offload multicast disabled Offload multicast disabled flag, for more accurate mc flood behavior: When it is on, the mdb should be ignored. When it is off, unregistered mc packets should be flooded to mc router ports. Signed-off-by: Nogah Frankel Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Acked-by: Ivan Vecera Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ net/bridge/br_multicast.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index eba80c4fc56f..2971c2a2cdf2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -48,6 +48,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, }; struct switchdev_attr { @@ -62,6 +63,7 @@ struct switchdev_attr { unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ + bool mc_disabled; /* MC_DISABLED */ } u; }; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1de3438e36bf..8c0e896936ff 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -27,6 +27,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include #include @@ -1007,6 +1008,18 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) } #endif +static void br_mc_disabled_update(struct net_device *dev, bool value) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + .flags = SWITCHDEV_F_DEFER, + .u.mc_disabled = value, + }; + + switchdev_port_attr_set(dev, &attr); +} + int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; @@ -1019,6 +1032,8 @@ int br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, (unsigned long)port); #endif + br_mc_disabled_update(port->dev, port->br->multicast_disabled); + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!port->mcast_stats) return -ENOMEM; @@ -2121,6 +2136,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled == !val) goto unlock; + br_mc_disabled_update(br->dev, !val); br->multicast_disabled = !val; if (br->multicast_disabled) goto unlock; -- cgit v1.2.3 From 6d5496483f5eb7b4da2e83c7b2149a21ad412d96 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Thu, 9 Feb 2017 14:54:42 +0100 Subject: switchdev: bridge: Offload mc router ports Offload the mc router ports list, whenever it is being changed. It is done because in some cases mc packets needs to be flooded to all the ports in this list. Signed-off-by: Nogah Frankel Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Acked-by: Ivan Vecera Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ net/bridge/br_multicast.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 2971c2a2cdf2..929d6af321cd 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, @@ -61,6 +62,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ bool mc_disabled; /* MC_DISABLED */ diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2add6d417aa4..b760f2620abf 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1317,6 +1317,19 @@ br_multicast_update_query_timer(struct net_bridge *br, mod_timer(&query->timer, jiffies + br->multicast_querier_interval); } +static void br_port_mc_router_state_change(struct net_bridge_port *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1342,6 +1355,7 @@ static void br_multicast_add_router(struct net_bridge *br, else hlist_add_head_rcu(&port->rlist, &br->router_list); br_rtr_notify(br->dev, port, RTM_NEWMDB); + br_port_mc_router_state_change(port, true); } static void br_multicast_mark_router(struct net_bridge *br, @@ -2049,6 +2063,7 @@ static void __del_port_router(struct net_bridge_port *p) return; hlist_del_init_rcu(&p->rlist); br_rtr_notify(p->br->dev, p, RTM_DELMDB); + br_port_mc_router_state_change(p, false); /* don't allow timer refresh */ if (p->multicast_router == MDB_RTR_TYPE_TEMP) -- cgit v1.2.3 From 71d0ed7079dffbc5cd0941d77d9b84e04109c9bb Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 7 Feb 2017 09:56:07 +0200 Subject: net/act_pedit: Support using offset relative to the conventional network headers Extend pedit to enable the user setting offset relative to network headers. This change would enable to work with more complex header schemes (vs the simple IPv4 case) where setting a fixed offset relative to the network header is not enough. After this patch, the action has information about the exact header type and field inside this header. This information could be used later on for hardware offloading of pedit. Backward compatibility was being kept: 1. Old kernel <-> new userspace 2. New kernel <-> old userspace 3. add rule using new userspace <-> dump using old userspace 4. add rule using old userspace <-> dump using new userspace When using the extended api, new netlink attributes are being used. This way, operation will fail in (1) and (3) - and no malformed rule be added or dumped. Of course, new user space that doesn't need the new functionality can use the old netlink attributes and operation will succeed. Since action can support both api's, (2) should work, and it is easy to write the new user space to have (4) work. The action is having a strict check that only header types and commands it can handle are accepted. This way future additions will be much easier. Usage example: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto tcp \ dst_port 80 \ action pedit munge tcp dport set 8080 pipe \ action mirred egress redirect dev veth0 Will forward tcp port whose original dest port is 80, while modifying the destination port to 8080. Signed-off-by: Amir Vadai Reviewed-by: Or Gerlitz Signed-off-by: David S. Miller --- include/net/tc_act/tc_pedit.h | 5 + include/uapi/linux/tc_act/tc_pedit.h | 23 ++++ net/sched/act_pedit.c | 196 ++++++++++++++++++++++++++++++++--- 3 files changed, 208 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 29e38d6823df..e076f22035a5 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -3,11 +3,16 @@ #include +struct tcf_pedit_key_ex { + enum pedit_header_type htype; +}; + struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct tc_pedit_key *tcfp_keys; + struct tcf_pedit_key_ex *tcfp_keys_ex; }; #define to_pedit(a) ((struct tcf_pedit *)a) diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 6389959a5157..22f19eeda997 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -11,10 +11,33 @@ enum { TCA_PEDIT_TM, TCA_PEDIT_PARMS, TCA_PEDIT_PAD, + TCA_PEDIT_PARMS_EX, + TCA_PEDIT_KEYS_EX, + TCA_PEDIT_KEY_EX, __TCA_PEDIT_MAX }; #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1) +enum { + TCA_PEDIT_KEY_EX_HTYPE = 1, + __TCA_PEDIT_KEY_EX_MAX +}; +#define TCA_PEDIT_KEY_EX_MAX (__TCA_PEDIT_KEY_EX_MAX - 1) + + /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It + * means no specific header type - offset is relative to the network layer + */ +enum pedit_header_type { + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0, + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1, + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2, + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3, + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4, + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5, + __PEDIT_HDR_TYPE_MAX, +}; +#define TCA_PEDIT_HDR_TYPE_MAX (__PEDIT_HDR_TYPE_MAX - 1) + struct tc_pedit_key { __u32 mask; /* AND */ __u32 val; /*XOR */ diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b27c4daec88f..fdd012bd3602 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -22,6 +22,7 @@ #include #include #include +#include #define PEDIT_TAB_MASK 15 @@ -30,18 +31,112 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; +static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = { + [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 }, +}; + +static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, + u8 n) +{ + struct tcf_pedit_key_ex *keys_ex; + struct tcf_pedit_key_ex *k; + const struct nlattr *ka; + int err = -EINVAL; + int rem; + + if (!nla || !n) + return NULL; + + keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); + if (!keys_ex) + return ERR_PTR(-ENOMEM); + + k = keys_ex; + + nla_for_each_nested(ka, nla, rem) { + struct nlattr *tb[TCA_PEDIT_KEY_EX_MAX + 1]; + + if (!n) { + err = -EINVAL; + goto err_out; + } + n--; + + if (nla_type(ka) != TCA_PEDIT_KEY_EX) { + err = -EINVAL; + goto err_out; + } + + err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, + pedit_key_ex_policy); + if (err) + goto err_out; + + if (!tb[TCA_PEDIT_KEY_EX_HTYPE]) { + err = -EINVAL; + goto err_out; + } + + k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]); + + if (k->htype > TCA_PEDIT_HDR_TYPE_MAX) { + err = -EINVAL; + goto err_out; + } + + k++; + } + + if (n) + goto err_out; + + return keys_ex; + +err_out: + kfree(keys_ex); + return ERR_PTR(err); +} + +static int tcf_pedit_key_ex_dump(struct sk_buff *skb, + struct tcf_pedit_key_ex *keys_ex, int n) +{ + struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + + for (; n > 0; n--) { + struct nlattr *key_start; + + key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + + if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype)) { + nlmsg_trim(skb, keys_start); + return -EINVAL; + } + + nla_nest_end(skb, key_start); + + keys_ex++; + } + + nla_nest_end(skb, keys_start); + + return 0; +} + static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct nlattr *pattr; struct tc_pedit *parm; int ret = 0, err; struct tcf_pedit *p; struct tc_pedit_key *keys = NULL; + struct tcf_pedit_key_ex *keys_ex; int ksize; if (nla == NULL) @@ -51,13 +146,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_PEDIT_PARMS] == NULL) + pattr = tb[TCA_PEDIT_PARMS]; + if (!pattr) + pattr = tb[TCA_PEDIT_PARMS_EX]; + if (!pattr) return -EINVAL; - parm = nla_data(tb[TCA_PEDIT_PARMS]); + + parm = nla_data(pattr); ksize = parm->nkeys * sizeof(struct tc_pedit_key); - if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) + if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; + keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys); + if (IS_ERR(keys_ex)) + return PTR_ERR(keys_ex); + if (!tcf_hash_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; @@ -69,6 +172,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { tcf_hash_cleanup(*a, est); + kfree(keys_ex); return -ENOMEM; } ret = ACT_P_CREATED; @@ -81,8 +185,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); - if (keys == NULL) + if (!keys) { + kfree(keys_ex); return -ENOMEM; + } } } @@ -95,6 +201,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + + kfree(p->tcfp_keys_ex); + p->tcfp_keys_ex = keys_ex; + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, *a); @@ -106,6 +216,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) struct tcf_pedit *p = to_pedit(a); struct tc_pedit_key *keys = p->tcfp_keys; kfree(keys); + kfree(p->tcfp_keys_ex); } static bool offset_valid(struct sk_buff *skb, int offset) @@ -119,38 +230,84 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } +static int pedit_skb_hdr_offset(struct sk_buff *skb, + enum pedit_header_type htype, int *hoffset) +{ + int ret = -EINVAL; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + if (skb_mac_header_was_set(skb)) { + *hoffset = skb_mac_offset(skb); + ret = 0; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + *hoffset = skb_network_offset(skb); + ret = 0; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + if (skb_transport_header_was_set(skb)) { + *hoffset = skb_transport_offset(skb); + ret = 0; + } + break; + default: + ret = -EINVAL; + break; + }; + + return ret; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; - unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; - off = skb_network_offset(skb); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; + struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; + enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, _data; int offset = tkey->off; + int hoffset; + int rc; + + if (tkey_ex) { + htype = tkey_ex->htype; + tkey_ex++; + } + + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info("tc filter pedit bad header type specified (0x%x)\n", + htype); + goto bad; + } if (tkey->offmask) { char *d, _d; - if (!offset_valid(skb, off + tkey->at)) { + if (!offset_valid(skb, hoffset + tkey->at)) { pr_info("tc filter pedit 'at' offset %d out of bounds\n", - off + tkey->at); + hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, off + tkey->at, 1, + d = skb_header_pointer(skb, hoffset + tkey->at, 1, &_d); if (!d) goto bad; @@ -163,19 +320,19 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, goto bad; } - if (!offset_valid(skb, off + offset)) { + if (!offset_valid(skb, hoffset + offset)) { pr_info("tc filter pedit offset %d out of bounds\n", - offset); + hoffset + offset); goto bad; } - ptr = skb_header_pointer(skb, off + offset, 4, &_data); + ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data); if (!ptr) goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); if (ptr == &_data) - skb_store_bits(skb, off + offset, ptr, 4); + skb_store_bits(skb, hoffset + offset, ptr, 4); } goto done; @@ -215,8 +372,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->refcnt = p->tcf_refcnt - ref; opt->bindcnt = p->tcf_bindcnt - bind; - if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) - goto nla_put_failure; + if (p->tcfp_keys_ex) { + tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); + + if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt)) + goto nla_put_failure; + } else { + if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) + goto nla_put_failure; + } tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) -- cgit v1.2.3 From 853a14ba4682f820266469979c9297debc05f60c Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 7 Feb 2017 09:56:08 +0200 Subject: net/act_pedit: Introduce 'add' operation This command could be useful to inc/dec fields. For example, to forward any TCP packet and decrease its TTL: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower ip_proto tcp \ action pedit munge ip ttl add 0xff pipe \ action mirred egress redirect dev veth0 In the example above, adding 0xff to this u8 field is actually decreasing it by one, since the operation is masked. Signed-off-by: Amir Vadai Reviewed-by: Or Gerlitz Signed-off-by: David S. Miller --- include/net/tc_act/tc_pedit.h | 1 + include/uapi/linux/tc_act/tc_pedit.h | 8 ++++++++ net/sched/act_pedit.c | 30 ++++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index e076f22035a5..dfbd6ee0bc7c 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -5,6 +5,7 @@ struct tcf_pedit_key_ex { enum pedit_header_type htype; + enum pedit_cmd cmd; }; struct tcf_pedit { diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 22f19eeda997..143d2b31a316 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -20,6 +20,7 @@ enum { enum { TCA_PEDIT_KEY_EX_HTYPE = 1, + TCA_PEDIT_KEY_EX_CMD = 2, __TCA_PEDIT_KEY_EX_MAX }; #define TCA_PEDIT_KEY_EX_MAX (__TCA_PEDIT_KEY_EX_MAX - 1) @@ -38,6 +39,13 @@ enum pedit_header_type { }; #define TCA_PEDIT_HDR_TYPE_MAX (__PEDIT_HDR_TYPE_MAX - 1) +enum pedit_cmd { + TCA_PEDIT_KEY_EX_CMD_SET = 0, + TCA_PEDIT_KEY_EX_CMD_ADD = 1, + __PEDIT_CMD_MAX, +}; +#define TCA_PEDIT_CMD_MAX (__PEDIT_CMD_MAX - 1) + struct tc_pedit_key { __u32 mask; /* AND */ __u32 val; /*XOR */ diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdd012bd3602..c1310472f620 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -36,6 +36,7 @@ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = { [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 }, + [TCA_PEDIT_KEY_EX_CMD] = { .type = NLA_U16 }, }; static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, @@ -75,14 +76,17 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, if (err) goto err_out; - if (!tb[TCA_PEDIT_KEY_EX_HTYPE]) { + if (!tb[TCA_PEDIT_KEY_EX_HTYPE] || + !tb[TCA_PEDIT_KEY_EX_CMD]) { err = -EINVAL; goto err_out; } k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]); + k->cmd = nla_get_u16(tb[TCA_PEDIT_KEY_EX_CMD]); - if (k->htype > TCA_PEDIT_HDR_TYPE_MAX) { + if (k->htype > TCA_PEDIT_HDR_TYPE_MAX || + k->cmd > TCA_PEDIT_CMD_MAX) { err = -EINVAL; goto err_out; } @@ -110,7 +114,8 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); - if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype)) { + if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) || + nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) { nlmsg_trim(skb, keys_start); return -EINVAL; } @@ -280,15 +285,19 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, _data; int offset = tkey->off; int hoffset; + u32 val; int rc; if (tkey_ex) { htype = tkey_ex->htype; + cmd = tkey_ex->cmd; + tkey_ex++; } @@ -330,7 +339,20 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (!ptr) goto bad; /* just do it, baby */ - *ptr = ((*ptr & tkey->mask) ^ tkey->val); + switch (cmd) { + case TCA_PEDIT_KEY_EX_CMD_SET: + val = tkey->val; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + val = (*ptr + tkey->val) & ~tkey->mask; + break; + default: + pr_info("tc filter pedit bad command (%d)\n", + cmd); + goto bad; + } + + *ptr = ((*ptr & tkey->mask) ^ val); if (ptr == &_data) skb_store_bits(skb, hoffset + offset, ptr, 4); } -- cgit v1.2.3 From 4d56a29f17508b2eb8bee66b8f0e3679201fa807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 7 Feb 2017 15:03:05 -0800 Subject: net: dsa: remove unnecessary phy*.h includes Including phy.h and phy_fixed.h into net/dsa.h causes phy*.h to be an unnecessary dependency for quite a large amount of the kernel. There's very little which actually requires definitions from phy.h in net/dsa.h - the include itself only wants the declaration of a couple of structures and IFNAMSIZ. Add linux/if.h for IFNAMSIZ, declarations for the structures, phy.h to mv88e6xxx.h as it needs it for phy_interface_t, and remove both phy.h and phy_fixed.h from net/dsa.h. This patch reduces from around 800 files rebuilt to around 40 - even with ccache, the time difference is noticable. Tested-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 + include/net/dsa.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index d6b335cd8c09..ac54f40813f7 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef UINT64_MAX #define UINT64_MAX (u64)(~((u64)0)) diff --git a/include/net/dsa.h b/include/net/dsa.h index b49b2004891e..4e13e695f025 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -11,17 +11,18 @@ #ifndef __LINUX_NET_DSA_H #define __LINUX_NET_DSA_H +#include #include #include #include #include #include #include -#include -#include #include struct tc_action; +struct phy_device; +struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, -- cgit v1.2.3 From c16ec18599c8c1722d476011786fd9e2529888f7 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 11 Feb 2017 13:49:20 +0200 Subject: net: rename dst_neigh_output back to neigh_output After the dst->pending_confirm flag was removed, we do not need anymore to provide dst arg to dst_neigh_output. So, rename it to neigh_output as before commit 5110effee8fd ("net: Do delayed neigh confirmation."). Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ++-- include/net/dst.h | 12 ------------ include/net/neighbour.h | 10 ++++++++++ net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 5 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 630eafdb79e8..22379da63400 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -379,7 +379,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = dst_neigh_output(dst, neigh, skb); + ret = neigh_output(neigh, skb); rcu_read_unlock_bh(); return ret; } @@ -577,7 +577,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = dst_neigh_output(dst, neigh, skb); + ret = neigh_output(neigh, skb); } rcu_read_unlock_bh(); diff --git a/include/net/dst.h b/include/net/dst.h index 84a1043dd6a1..049af33da3b6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -442,18 +442,6 @@ static inline void dst_confirm(struct dst_entry *dst) { } -static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, - struct sk_buff *skb) -{ - const struct hh_cache *hh; - - hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) - return neigh_hh_output(hh, skb); - else - return n->output(n, skb); -} - static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 8b683841e574..5ebf69491160 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -468,6 +468,16 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb return dev_queue_xmit(skb); } +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +{ + const struct hh_cache *hh = &n->hh; + + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); +} + static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7a719f1ae556..737ce826d7ec 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -225,7 +225,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s int res; sock_confirm_neigh(skb, neigh); - res = dst_neigh_output(dst, neigh, skb); + res = neigh_output(neigh, skb); rcu_read_unlock_bh(); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d299040613a0..a75871c62328 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = dst_neigh_output(dst, neigh, skb); + ret = neigh_output(neigh, skb); rcu_read_unlock_bh(); return ret; } -- cgit v1.2.3 From 1a94e38d254b3622d5d53f74b3b716b0fcab0ba8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Feb 2017 12:08:23 +0100 Subject: netfilter: nf_tables: add NFTA_RULE_ID attribute This new attribute allows us to uniquely identify a rule in transaction. Robots may trigger an insertion followed by deletion in a batch, in that scenario we still don't have a public rule handle that we can use to delete the rule. This is similar to the NFTA_SET_ID attribute that allows us to refer to an anonymous set from a batch. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 21ce50e6d0c5..ac84686aaafb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1202,10 +1202,13 @@ struct nft_trans { struct nft_trans_rule { struct nft_rule *rule; + u32 rule_id; }; #define nft_trans_rule(trans) \ (((struct nft_trans_rule *)trans->data)->rule) +#define nft_trans_rule_id(trans) \ + (((struct nft_trans_rule *)trans->data)->rule_id) struct nft_trans_set { struct nft_set *set; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 207951516ede..05215d30fe5c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -207,6 +207,7 @@ enum nft_chain_attributes { * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) + * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -218,6 +219,7 @@ enum nft_rule_attributes { NFTA_RULE_POSITION, NFTA_RULE_USERDATA, NFTA_RULE_PAD, + NFTA_RULE_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 71c60a04b66b..6c782532615f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -240,6 +240,10 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, if (trans == NULL) return NULL; + if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) { + nft_trans_rule_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); + } nft_trans_rule(trans) = rule; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -2293,6 +2297,22 @@ err1: return err; } +static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; + + list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_rule *rule = nft_trans_rule(trans); + + if (trans->msg_type == NFT_MSG_NEWRULE && + id == nft_trans_rule_id(trans)) + return rule; + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_delrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2330,6 +2350,12 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, if (IS_ERR(rule)) return PTR_ERR(rule); + err = nft_delrule(&ctx, rule); + } else if (nla[NFTA_RULE_ID]) { + rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + err = nft_delrule(&ctx, rule); } else { err = nft_delrule_by_chain(&ctx); -- cgit v1.2.3 From 37fabbf4d489cc2e1cbf7cde816d9453a65ddfb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2017 05:46:46 -0800 Subject: net: busy-poll: remove LL_FLUSH_FAILED and LL_FLUSH_BUSY Commit 79e7fff47b7b ("net: remove support for per driver ndo_busy_poll()") made them obsolete. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 4 ---- net/core/dev.c | 3 --- 2 files changed, 7 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index d73b849e29a6..b8d637225a07 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,10 +33,6 @@ struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; -/* return values from ndo_ll_poll */ -#define LL_FLUSH_FAILED -1 -#define LL_FLUSH_BUSY -2 - static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; diff --git a/net/core/dev.c b/net/core/dev.c index 363c44b9be63..2f1bbe1bf67c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5008,9 +5008,6 @@ count: LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || busy_loop_timeout(end_time)) break; -- cgit v1.2.3 From 8ae70032552a8082734d0b8550848cf6bf92e1d5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 15 Feb 2017 11:57:50 +0100 Subject: sched: have stub for tcf_destroy_chain in case NET_CLS is not configured This fixes broken build for !NET_CLS: net/built-in.o: In function `fq_codel_destroy': /home/sab/linux/net-next/net/sched/sch_fq_codel.c:468: undefined reference to `tcf_destroy_chain' Fixes: cf1facda2f61 ("sched: move tcf_proto_destroy and tcf_destroy_chain helpers into cls_api") Reported-by: Sabrina Dubroca Signed-off-by: Jiri Pirko Tested-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 71b266cd63d4..be5c12a5c375 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,7 +17,13 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +#ifdef CONFIG_NET_CLS void tcf_destroy_chain(struct tcf_proto __rcu **fl); +#else +static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl) +{ +} +#endif static inline unsigned long __cls_set_class(unsigned long *clp, unsigned long cl) -- cgit v1.2.3