From a9efad8b24bd22616f6c749a6c029957dc76542b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 May 2016 14:24:56 -0700 Subject: net_sched: avoid too many hrtimer_start() calls I found a serious performance bug in packet schedulers using hrtimers. sch_htb and sch_fq are definitely impacted by this problem. We constantly rearm high resolution timers if some packets are throttled in one (or more) class, and other packets are flying through qdisc on another (non throttled) class. hrtimer_start() does not have the mod_timer() trick of doing nothing if expires value does not change : if (timer_pending(timer) && timer->expires == expires) return 1; This issue is particularly visible when multiple cpus can queue/dequeue packets on the same qdisc, as hrtimer code has to lock a remote base. I used following fix : 1) Change htb to use qdisc_watchdog_schedule_ns() instead of open-coding it. 2) Cache watchdog prior expiration. hrtimer might provide this, but I prefer to not rely on some hrtimer internal. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 401038d2f9b8..fea53f4d92ca 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,6 +61,7 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { + u64 last_expires; struct hrtimer timer; struct Qdisc *qdisc; }; -- cgit v1.2.3 From 3d3ed18151172c845a11b7c184f2120220ae19fc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 23 May 2016 21:07:20 -0400 Subject: net sched actions: policer missing timestamp processing Policer was not dumping or updating timestamps Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 4 +++- net/sched/act_police.c | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index eba5914ba5d1..f4297c8a42fe 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -145,6 +145,8 @@ enum { TCA_POLICE_PEAKRATE, TCA_POLICE_AVRATE, TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, __TCA_POLICE_MAX #define TCA_POLICE_RESULT TCA_POLICE_RESULT }; @@ -173,7 +175,7 @@ enum { TCA_U32_DIVISOR, TCA_U32_SEL, TCA_U32_POLICE, - TCA_U32_ACT, + TCA_U32_ACT, TCA_U32_INDEV, TCA_U32_PCNT, TCA_U32_MARK, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 330f14e302e8..b884dae692a1 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -239,6 +239,8 @@ override: police->tcfp_t_c = ktime_get_ns(); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(tn); + police->tcf_tm.install = jiffies; + police->tcf_tm.lastuse = jiffies; h = tcf_hash(police->tcf_index, POL_TAB_MASK); spin_lock_bh(&hinfo->lock); hlist_add_head(&police->tcf_head, &hinfo->htab[h]); @@ -268,6 +270,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, spin_lock(&police->tcf_lock); bstats_update(&police->tcf_bstats, skb); + tcf_lastuse_update(&police->tcf_tm); if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { @@ -327,6 +330,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) .refcnt = police->tcf_refcnt - ref, .bindcnt = police->tcf_bindcnt - bind, }; + struct tcf_t t; if (police->rate_present) psched_ratecfg_getrate(&opt.rate, &police->rate); @@ -340,6 +344,13 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_ewma_rate && nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate)) goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(police->tcf_tm.expires); + if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD)) + goto nla_put_failure; + return skb->len; nla_put_failure: -- cgit v1.2.3 From 3851112e4737cd52aaeda0ce8d084be9ee128106 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Ravipati Date: Sun, 22 May 2016 23:59:00 -0700 Subject: ethtool: add support for 25G/50G/100G speed modes This patch enhances ethtool link mode bitmap to include 25G/50G/100G speed along with interface modes Signed-off-by: Vidya Sagar Ravipati Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9222db8ccccc..5f030b46cff4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1353,6 +1353,15 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1361,7 +1370,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, + = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit v1.2.3 From 9791d8e7627d1c4dbf8819646833f2f576b4f8f3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 25 May 2016 16:50:45 +0200 Subject: ipv6: hide ip6_encap_hlen/ip6_tnl_encap definitions A recent cleanup moved MAX_IPTUN_ENCAP_OPS along with some other definitions, but it is now invisible when CONFIG_INET is not defined, but still referenced from ip6_tunnel.h: In file included from net/xfrm/xfrm_input.c:17:0: include/net/ip6_tunnel.h:67:17: error: 'MAX_IPTUN_ENCAP_OPS' undeclared here (not in a function) ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; ^~~~~~~~~~~~~~~~~~~ This hides the ip6_encap_hlen and ip6_tnl_encap functions inside of CONFIG_INET so we don't run into the the problem. Alternatively we could move the macro out of the #ifdef again to restore the previous behavior Signed-off-by: Arnd Bergmann Fixes: 55c2bc143224 ("net: Cleanup encap items in ip_tunnels.h") Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d325c81332e3..43a5a0e4524c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -63,6 +63,8 @@ struct ip6_tnl_encap_ops { u8 *protocol, struct flowi6 *fl6); }; +#ifdef CONFIG_INET + extern const struct ip6_tnl_encap_ops __rcu * ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; @@ -138,7 +140,6 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); -#ifdef CONFIG_INET static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { -- cgit v1.2.3 From 40eb90e9ccc3f96f937ea1db79d0f9cb61553ed5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 29 May 2016 17:42:13 +0800 Subject: sctp: sctp_diag should dump sctp socket type Now we cannot distinguish that one sk is a udp or sctp style when we use ss to dump sctp_info. it's necessary to dump it as well. For sctp_diag, ss support is not officially available, thus there are no official users of this yet, so we can add this field in the middle of sctp_info without breaking user API. v1->v2: - move 'sctpi_s_type' field to the end of struct sctp_info, so that it won't cause incompatibility with applications already built. - add __reserved3 in sctp_info to make sure sctp_info is 8-byte alignment. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 ++ net/sctp/socket.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index dacb5e711994..de1f64318fc4 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -765,6 +765,8 @@ struct sctp_info { __u8 sctpi_s_disable_fragments; __u8 sctpi_s_v4mapped; __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; }; struct sctp_infox { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 777d0324594a..67154b848aa9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4220,6 +4220,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_s_disable_fragments = sp->disable_fragments; info->sctpi_s_v4mapped = sp->v4mapped; info->sctpi_s_frag_interleave = sp->frag_interleave; + info->sctpi_s_type = sp->type; return 0; } -- cgit v1.2.3