diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-03 17:10:39 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-03 17:10:39 -0700 |
| commit | c42f1d4b523950c4af060f8fc0c7016755d8a3bc (patch) | |
| tree | 8322c9c14b0c7055ce924d12c1150d2acb0ea95e /net/core/drop_monitor.c | |
| parent | 913a90416918a591e6d5ece036b795c58a08131d (diff) | |
| parent | 5a8887d39e1ba5ee2d4ccb94b14d6f2dce5ddfca (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Transfer padding was wrong for full-speed USB in ASIX driver, fix
from Ingo van Lil.
2) Propagate the negative packet offset fix into the PowerPC BPF JIT.
From Jan Seiffert.
3) dl2k driver's private ioctls were letting unprivileged tasks make
MII writes and other ugly bits like that. Fix from Jeff Mahoney.
4) Fix TX VLAN and RX packet drops in ucc_geth, from Joakim Tjernlund.
5) OOPS and network namespace fixes in IPVS from Hans Schillstrom and
Julian Anastasov.
6) Fix races and sleeping in locked context bugs in drop_monitor, from
Neil Horman.
7) Fix link status indication in smsc95xx driver, from Paolo Pisati.
8) Fix bridge netfilter OOPS, from Peter Huang.
9) L2TP sendmsg can return on error conditions with the socket lock
held, oops. Fix from Sasha Levin.
10) udp_diag should return meaningful values for socket memory usage,
from Shan Wei.
11) Eric Dumazet is so awesome he gets his own section:
Socket memory cgroup code (I never should have applied those
patches, grumble...) made erroneous changes to
sk_sockets_allocated_read_positive(). It was changed to
use percpu_counter_sum_positive (which requires BH disabling)
instead of percpu_counter_read_positive (which does not).
Revert back to avoid crashes and lockdep warnings.
Adjust the default tcp_adv_win_scale and tcp_rmem[2] values
to fix throughput regressions. This is necessary as a result
of our more precise skb->truesize tracking.
Fix SKB leak in netem packet scheduler.
12) New device IDs for various bluetooth devices, from Manoj Iyer,
AceLan Kao, and Steven Harms.
13) Fix command completion race in ipw2200, from Stanislav Yakovlev.
14) Fix rtlwifi oops on unload, from Larry Finger.
15) Fix hard_mtu when adjusting hard_header_len in smsc95xx driver.
From Stephane Fillod.
16) ehea driver registers it's IRQ before all the necessary state is
setup, resulting in crashes. Fix from Thadeu Lima de Souza
Cascardo.
17) Fix PHY connection failures in davinci_emac driver, from Anatolij
Gustschin.
18) Missing break; in switch statement in bluetooth's
hci_cmd_complete_evt(). Fix from Szymon Janc.
19) Fix queue programming in iwlwifi, from Johannes Berg.
20) Interrupt throttling defaults not being actually programmed into the
hardware, fix from Jeff Kirsher and Ying Cai.
21) TLAN driver SKB encoding in descriptor busted on 64-bit, fix from
Benjamin Poirier.
22) Fix blind status block RX producer pointer deref in TG3 driver, from
Matt Carlson.
23) Promisc and multicast are busted on ehea, fixes from Thadeu Lima de
Souza Cascardo.
24) Fix crashes in 6lowpan, from Alexander Smirnov.
25) tcp_complete_cwr() needs to be careful to not rewind the CWND to
ssthresh if ssthresh has the "infinite" value. Fix from Yuchung
Cheng.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (81 commits)
sungem: Fix WakeOnLan
tcp: change tcp_adv_win_scale and tcp_rmem[2]
net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg
drop_monitor: prevent init path from scheduling on the wrong cpu
usbnet: fix failure handling in usbnet_probe
usbnet: fix leak of transfer buffer of dev->interrupt
ucc_geth: Add 16 bytes to max TX frame for VLANs
net: ucc_geth, increase no. of HW RX descriptors
netem: fix possible skb leak
sky2: fix receive length error in mixed non-VLAN/VLAN traffic
sky2: propogate rx hash when packet is copied
net: fix two typos in skbuff.h
cxgb3: Don't call cxgb_vlan_mode until q locks are initialized
ixgbe: fix calling skb_put on nonlinear skb assertion bug
ixgbe: Fix a memory leak in IEEE DCB
igbvf: fix the bug when initializing the igbvf
smsc75xx: enable mac to detect speed/duplex from phy
smsc75xx: declare smsc75xx's MII as GMII capable
smsc75xx: fix phy interrupt acknowledge
smsc75xx: fix phy init reset loop
...
Diffstat (limited to 'net/core/drop_monitor.c')
| -rw-r--r-- | net/core/drop_monitor.c | 88 |
1 files changed, 64 insertions, 24 deletions
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 5c3c81a609e..a7cad741df0 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -42,13 +42,14 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_SPINLOCK(trace_state_lock); +static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { struct work_struct dm_alert_work; - struct sk_buff *skb; + struct sk_buff __rcu *skb; atomic_t dm_hit_count; struct timer_list send_timer; + int cpu; }; struct dm_hw_stat_delta { @@ -79,29 +80,53 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; + struct sk_buff *skb; + struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); - data->skb = genlmsg_new(al, GFP_KERNEL); - genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - atomic_set(&data->dm_hit_count, dm_hit_limit); + skb = genlmsg_new(al, GFP_KERNEL); + + if (skb) { + genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); + memset(msg, 0, al); + } else + schedule_work_on(data->cpu, &data->dm_alert_work); + + /* + * Don't need to lock this, since we are guaranteed to only + * run this on a single cpu at a time. + * Note also that we only update data->skb if the old and new skb + * pointers don't match. This ensures that we don't continually call + * synchornize_rcu if we repeatedly fail to alloc a new netlink message. + */ + if (skb != oskb) { + rcu_assign_pointer(data->skb, skb); + + synchronize_rcu(); + + atomic_set(&data->dm_hit_count, dm_hit_limit); + } + } static void send_dm_alert(struct work_struct *unused) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + WARN_ON_ONCE(data->cpu != smp_processor_id()); /* * Grab the skb we're about to send */ - skb = data->skb; + skb = rcu_dereference_protected(data->skb, 1); /* * Replace it with a new one @@ -111,8 +136,10 @@ static void send_dm_alert(struct work_struct *unused) /* * Ship it! */ - genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + if (skb) + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + put_cpu_var(dm_cpu_data); } /* @@ -123,9 +150,11 @@ static void send_dm_alert(struct work_struct *unused) */ static void sched_send_work(unsigned long unused) { - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + schedule_work_on(smp_processor_id(), &data->dm_alert_work); - schedule_work(&data->dm_alert_work); + put_cpu_var(dm_cpu_data); } static void trace_drop_common(struct sk_buff *skb, void *location) @@ -134,8 +163,15 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlmsghdr *nlh; struct nlattr *nla; int i; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct sk_buff *dskb; + struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); + + + rcu_read_lock(); + dskb = rcu_dereference(data->skb); + if (!dskb) + goto out; if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { /* @@ -144,7 +180,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } - nlh = (struct nlmsghdr *)data->skb->data; + nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { @@ -158,7 +194,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) /* * We need to create a new entry */ - __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; @@ -170,6 +206,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) } out: + rcu_read_unlock(); + put_cpu_var(dm_cpu_data); return; } @@ -214,7 +252,7 @@ static int set_all_monitor_traces(int state) struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *temp; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); if (state == trace_state) { rc = -EAGAIN; @@ -253,7 +291,7 @@ static int set_all_monitor_traces(int state) rc = -EINPROGRESS; out_unlock: - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); return rc; } @@ -296,12 +334,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; case NETDEV_UNREGISTER: - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -312,7 +350,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; } out: @@ -368,13 +406,15 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - reset_per_cpu_data(data); + data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); data->send_timer.data = cpu; data->send_timer.function = sched_send_work; + reset_per_cpu_data(data); } + goto out; out_unreg: |
