summaryrefslogtreecommitdiff
path: root/net/core/drop_monitor.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-03 17:10:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-03 17:10:39 -0700
commitc42f1d4b523950c4af060f8fc0c7016755d8a3bc (patch)
tree8322c9c14b0c7055ce924d12c1150d2acb0ea95e /net/core/drop_monitor.c
parent913a90416918a591e6d5ece036b795c58a08131d (diff)
parent5a8887d39e1ba5ee2d4ccb94b14d6f2dce5ddfca (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Transfer padding was wrong for full-speed USB in ASIX driver, fix from Ingo van Lil. 2) Propagate the negative packet offset fix into the PowerPC BPF JIT. From Jan Seiffert. 3) dl2k driver's private ioctls were letting unprivileged tasks make MII writes and other ugly bits like that. Fix from Jeff Mahoney. 4) Fix TX VLAN and RX packet drops in ucc_geth, from Joakim Tjernlund. 5) OOPS and network namespace fixes in IPVS from Hans Schillstrom and Julian Anastasov. 6) Fix races and sleeping in locked context bugs in drop_monitor, from Neil Horman. 7) Fix link status indication in smsc95xx driver, from Paolo Pisati. 8) Fix bridge netfilter OOPS, from Peter Huang. 9) L2TP sendmsg can return on error conditions with the socket lock held, oops. Fix from Sasha Levin. 10) udp_diag should return meaningful values for socket memory usage, from Shan Wei. 11) Eric Dumazet is so awesome he gets his own section: Socket memory cgroup code (I never should have applied those patches, grumble...) made erroneous changes to sk_sockets_allocated_read_positive(). It was changed to use percpu_counter_sum_positive (which requires BH disabling) instead of percpu_counter_read_positive (which does not). Revert back to avoid crashes and lockdep warnings. Adjust the default tcp_adv_win_scale and tcp_rmem[2] values to fix throughput regressions. This is necessary as a result of our more precise skb->truesize tracking. Fix SKB leak in netem packet scheduler. 12) New device IDs for various bluetooth devices, from Manoj Iyer, AceLan Kao, and Steven Harms. 13) Fix command completion race in ipw2200, from Stanislav Yakovlev. 14) Fix rtlwifi oops on unload, from Larry Finger. 15) Fix hard_mtu when adjusting hard_header_len in smsc95xx driver. From Stephane Fillod. 16) ehea driver registers it's IRQ before all the necessary state is setup, resulting in crashes. Fix from Thadeu Lima de Souza Cascardo. 17) Fix PHY connection failures in davinci_emac driver, from Anatolij Gustschin. 18) Missing break; in switch statement in bluetooth's hci_cmd_complete_evt(). Fix from Szymon Janc. 19) Fix queue programming in iwlwifi, from Johannes Berg. 20) Interrupt throttling defaults not being actually programmed into the hardware, fix from Jeff Kirsher and Ying Cai. 21) TLAN driver SKB encoding in descriptor busted on 64-bit, fix from Benjamin Poirier. 22) Fix blind status block RX producer pointer deref in TG3 driver, from Matt Carlson. 23) Promisc and multicast are busted on ehea, fixes from Thadeu Lima de Souza Cascardo. 24) Fix crashes in 6lowpan, from Alexander Smirnov. 25) tcp_complete_cwr() needs to be careful to not rewind the CWND to ssthresh if ssthresh has the "infinite" value. Fix from Yuchung Cheng. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (81 commits) sungem: Fix WakeOnLan tcp: change tcp_adv_win_scale and tcp_rmem[2] net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg drop_monitor: prevent init path from scheduling on the wrong cpu usbnet: fix failure handling in usbnet_probe usbnet: fix leak of transfer buffer of dev->interrupt ucc_geth: Add 16 bytes to max TX frame for VLANs net: ucc_geth, increase no. of HW RX descriptors netem: fix possible skb leak sky2: fix receive length error in mixed non-VLAN/VLAN traffic sky2: propogate rx hash when packet is copied net: fix two typos in skbuff.h cxgb3: Don't call cxgb_vlan_mode until q locks are initialized ixgbe: fix calling skb_put on nonlinear skb assertion bug ixgbe: Fix a memory leak in IEEE DCB igbvf: fix the bug when initializing the igbvf smsc75xx: enable mac to detect speed/duplex from phy smsc75xx: declare smsc75xx's MII as GMII capable smsc75xx: fix phy interrupt acknowledge smsc75xx: fix phy init reset loop ...
Diffstat (limited to 'net/core/drop_monitor.c')
-rw-r--r--net/core/drop_monitor.c88
1 files changed, 64 insertions, 24 deletions
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5c3c81a609e..a7cad741df0 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -42,13 +42,14 @@ static void send_dm_alert(struct work_struct *unused);
* netlink alerts
*/
static int trace_state = TRACE_OFF;
-static DEFINE_SPINLOCK(trace_state_lock);
+static DEFINE_MUTEX(trace_state_mutex);
struct per_cpu_dm_data {
struct work_struct dm_alert_work;
- struct sk_buff *skb;
+ struct sk_buff __rcu *skb;
atomic_t dm_hit_count;
struct timer_list send_timer;
+ int cpu;
};
struct dm_hw_stat_delta {
@@ -79,29 +80,53 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
size_t al;
struct net_dm_alert_msg *msg;
struct nlattr *nla;
+ struct sk_buff *skb;
+ struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
al = sizeof(struct net_dm_alert_msg);
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
al += sizeof(struct nlattr);
- data->skb = genlmsg_new(al, GFP_KERNEL);
- genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
- 0, NET_DM_CMD_ALERT);
- nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
- msg = nla_data(nla);
- memset(msg, 0, al);
- atomic_set(&data->dm_hit_count, dm_hit_limit);
+ skb = genlmsg_new(al, GFP_KERNEL);
+
+ if (skb) {
+ genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+ 0, NET_DM_CMD_ALERT);
+ nla = nla_reserve(skb, NLA_UNSPEC,
+ sizeof(struct net_dm_alert_msg));
+ msg = nla_data(nla);
+ memset(msg, 0, al);
+ } else
+ schedule_work_on(data->cpu, &data->dm_alert_work);
+
+ /*
+ * Don't need to lock this, since we are guaranteed to only
+ * run this on a single cpu at a time.
+ * Note also that we only update data->skb if the old and new skb
+ * pointers don't match. This ensures that we don't continually call
+ * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
+ */
+ if (skb != oskb) {
+ rcu_assign_pointer(data->skb, skb);
+
+ synchronize_rcu();
+
+ atomic_set(&data->dm_hit_count, dm_hit_limit);
+ }
+
}
static void send_dm_alert(struct work_struct *unused)
{
struct sk_buff *skb;
- struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+ struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+
+ WARN_ON_ONCE(data->cpu != smp_processor_id());
/*
* Grab the skb we're about to send
*/
- skb = data->skb;
+ skb = rcu_dereference_protected(data->skb, 1);
/*
* Replace it with a new one
@@ -111,8 +136,10 @@ static void send_dm_alert(struct work_struct *unused)
/*
* Ship it!
*/
- genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+ if (skb)
+ genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+ put_cpu_var(dm_cpu_data);
}
/*
@@ -123,9 +150,11 @@ static void send_dm_alert(struct work_struct *unused)
*/
static void sched_send_work(unsigned long unused)
{
- struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+ struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+
+ schedule_work_on(smp_processor_id(), &data->dm_alert_work);
- schedule_work(&data->dm_alert_work);
+ put_cpu_var(dm_cpu_data);
}
static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -134,8 +163,15 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
struct nlmsghdr *nlh;
struct nlattr *nla;
int i;
- struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+ struct sk_buff *dskb;
+ struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+
+
+ rcu_read_lock();
+ dskb = rcu_dereference(data->skb);
+ if (!dskb)
+ goto out;
if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
/*
@@ -144,7 +180,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
goto out;
}
- nlh = (struct nlmsghdr *)data->skb->data;
+ nlh = (struct nlmsghdr *)dskb->data;
nla = genlmsg_data(nlmsg_data(nlh));
msg = nla_data(nla);
for (i = 0; i < msg->entries; i++) {
@@ -158,7 +194,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
/*
* We need to create a new entry
*/
- __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+ __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
msg->points[msg->entries].count = 1;
@@ -170,6 +206,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
}
out:
+ rcu_read_unlock();
+ put_cpu_var(dm_cpu_data);
return;
}
@@ -214,7 +252,7 @@ static int set_all_monitor_traces(int state)
struct dm_hw_stat_delta *new_stat = NULL;
struct dm_hw_stat_delta *temp;
- spin_lock(&trace_state_lock);
+ mutex_lock(&trace_state_mutex);
if (state == trace_state) {
rc = -EAGAIN;
@@ -253,7 +291,7 @@ static int set_all_monitor_traces(int state)
rc = -EINPROGRESS;
out_unlock:
- spin_unlock(&trace_state_lock);
+ mutex_unlock(&trace_state_mutex);
return rc;
}
@@ -296,12 +334,12 @@ static int dropmon_net_event(struct notifier_block *ev_block,
new_stat->dev = dev;
new_stat->last_rx = jiffies;
- spin_lock(&trace_state_lock);
+ mutex_lock(&trace_state_mutex);
list_add_rcu(&new_stat->list, &hw_stats_list);
- spin_unlock(&trace_state_lock);
+ mutex_unlock(&trace_state_mutex);
break;
case NETDEV_UNREGISTER:
- spin_lock(&trace_state_lock);
+ mutex_lock(&trace_state_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
new_stat->dev = NULL;
@@ -312,7 +350,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
}
}
}
- spin_unlock(&trace_state_lock);
+ mutex_unlock(&trace_state_mutex);
break;
}
out:
@@ -368,13 +406,15 @@ static int __init init_net_drop_monitor(void)
for_each_present_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
- reset_per_cpu_data(data);
+ data->cpu = cpu;
INIT_WORK(&data->dm_alert_work, send_dm_alert);
init_timer(&data->send_timer);
data->send_timer.data = cpu;
data->send_timer.function = sched_send_work;
+ reset_per_cpu_data(data);
}
+
goto out;
out_unreg: