diff options
Diffstat (limited to 'net')
223 files changed, 12361 insertions, 4051 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig index be33d27c8e69..80d4bf78905d 100644 --- a/net/802/Kconfig +++ b/net/802/Kconfig @@ -5,3 +5,6 @@ config STP config GARP tristate select STP + +config MRP + tristate diff --git a/net/802/Makefile b/net/802/Makefile index a30d6e385aed..37e654d6615e 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o +obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/mrp.c b/net/802/mrp.c new file mode 100644 index 000000000000..a4cc3229952a --- /dev/null +++ b/net/802/mrp.c @@ -0,0 +1,895 @@ +/* + * IEEE 802.1Q Multiple Registration Protocol (MRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/802/garp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <net/mrp.h> +#include <asm/unaligned.h> + +static unsigned int mrp_join_time __read_mostly = 200; +module_param(mrp_join_time, uint, 0644); +MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const u8 +mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = { + [MRP_APPLICANT_VO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VO, + }, + [MRP_APPLICANT_VP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_AA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VP, + }, + [MRP_APPLICANT_VN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_AN, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VN, + }, + [MRP_APPLICANT_AN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AN, + }, + [MRP_APPLICANT_AA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_QA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_LA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LA] = MRP_APPLICANT_LA, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_LA, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_LA, + }, + [MRP_APPLICANT_AO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_AO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AO, + }, + [MRP_APPLICANT_QO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_QO, + }, + [MRP_APPLICANT_AP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, + [MRP_APPLICANT_QP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QP, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, +}; + +static const u8 +mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = { + [MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, + [MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV, + [MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL, +}; + +static void mrp_attrvalue_inc(void *value, u8 len) +{ + u8 *v = (u8 *)value; + + /* Add 1 to the last byte. If it becomes zero, + * go to the previous byte and repeat. + */ + while (len > 0 && !++v[--len]) + ; +} + +static int mrp_attr_cmp(const struct mrp_attr *attr, + const void *value, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->len != len) + return attr->len - len; + return memcmp(attr->value, value, len); +} + +static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + parent = parent->rb_left; + else if (d < 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = NULL, **p = &app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + p = &parent->rb_left; + else if (d < 0) + p = &parent->rb_right; + else { + /* The attribute already exists; re-use it. */ + return attr; + } + } + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = MRP_APPLICANT_VO; + attr->type = type; + attr->len = len; + memcpy(attr->value, value, len); + + rb_link_node(&attr->node, parent, p); + rb_insert_color(&attr->node, &app->mad); + return attr; +} + +static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) +{ + rb_erase(&attr->node, &app->mad); + kfree(attr); +} + +static int mrp_pdu_init(struct mrp_applicant *app) +{ + struct sk_buff *skb; + struct mrp_pdu_hdr *ph; + + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = app->app->pkttype.type; + skb_reserve(skb, LL_RESERVED_SPACE(app->dev)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph)); + ph->version = app->app->version; + + app->pdu = skb; + return 0; +} + +static int mrp_pdu_append_end_mark(struct mrp_applicant *app) +{ + __be16 *endmark; + + if (skb_tailroom(app->pdu) < sizeof(*endmark)) + return -1; + endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark)); + put_unaligned(MRP_END_MARK, endmark); + return 0; +} + +static void mrp_pdu_queue(struct mrp_applicant *app) +{ + if (!app->pdu) + return; + + if (mrp_cb(app->pdu)->mh) + mrp_pdu_append_end_mark(app); + mrp_pdu_append_end_mark(app); + + dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type), + app->app->group_address, app->dev->dev_addr, + app->pdu->len); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void mrp_queue_xmit(struct mrp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, + u8 attrtype, u8 attrlen) +{ + struct mrp_msg_hdr *mh; + + if (mrp_cb(app->pdu)->mh) { + if (mrp_pdu_append_end_mark(app) < 0) + return -1; + mrp_cb(app->pdu)->mh = NULL; + mrp_cb(app->pdu)->vah = NULL; + } + + if (skb_tailroom(app->pdu) < sizeof(*mh)) + return -1; + mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh)); + mh->attrtype = attrtype; + mh->attrlen = attrlen; + mrp_cb(app->pdu)->mh = mh; + return 0; +} + +static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, + const void *firstattrvalue, u8 attrlen) +{ + struct mrp_vecattr_hdr *vah; + + if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) + return -1; + vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu, + sizeof(*vah) + attrlen); + put_unaligned(0, &vah->lenflags); + memcpy(vah->firstattrvalue, firstattrvalue, attrlen); + mrp_cb(app->pdu)->vah = vah; + memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen); + return 0; +} + +static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app, + const struct mrp_attr *attr, + enum mrp_vecattr_event vaevent) +{ + u16 len, pos; + u8 *vaevents; + int err; +again: + if (!app->pdu) { + err = mrp_pdu_init(app); + if (err < 0) + return err; + } + + /* If there is no Message header in the PDU, or the Message header is + * for a different attribute type, add an EndMark (if necessary) and a + * new Message header to the PDU. + */ + if (!mrp_cb(app->pdu)->mh || + mrp_cb(app->pdu)->mh->attrtype != attr->type || + mrp_cb(app->pdu)->mh->attrlen != attr->len) { + if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0) + goto queue; + } + + /* If there is no VectorAttribute header for this Message in the PDU, + * or this attribute's value does not sequentially follow the previous + * attribute's value, add a new VectorAttribute header to the PDU. + */ + if (!mrp_cb(app->pdu)->vah || + memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) { + if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0) + goto queue; + } + + len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags)); + pos = len % 3; + + /* Events are packed into Vectors in the PDU, three to a byte. Add a + * byte to the end of the Vector if necessary. + */ + if (!pos) { + if (skb_tailroom(app->pdu) < sizeof(u8)) + goto queue; + vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8)); + } else { + vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); + } + + switch (pos) { + case 0: + *vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + break; + case 1: + *vaevents += vaevent * __MRP_VECATTR_EVENT_MAX; + break; + case 2: + *vaevents += vaevent; + break; + default: + WARN_ON(1); + } + + /* Increment the length of the VectorAttribute in the PDU, as well as + * the value of the next attribute that would continue its Vector. + */ + put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags); + mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len); + + return 0; + +queue: + mrp_pdu_queue(app); + goto again; +} + +static void mrp_attr_event(struct mrp_applicant *app, + struct mrp_attr *attr, enum mrp_event event) +{ + enum mrp_applicant_state state; + + state = mrp_applicant_state_table[attr->state][event]; + if (state == MRP_APPLICANT_INVALID) { + WARN_ON(1); + return; + } + + if (event == MRP_EVENT_TX) { + /* When appending the attribute fails, don't update its state + * in order to retry at the next TX event. + */ + + switch (mrp_tx_action_table[attr->state]) { + case MRP_TX_ACTION_NONE: + case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL: + case MRP_TX_ACTION_S_IN_OPTIONAL: + break; + case MRP_TX_ACTION_S_NEW: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_NEW) < 0) + return; + break; + case MRP_TX_ACTION_S_JOIN_IN: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0) + return; + break; + case MRP_TX_ACTION_S_LV: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_LV) < 0) + return; + /* As a pure applicant, sending a leave message + * implies that the attribute was unregistered and + * can be destroyed. + */ + mrp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + } + + attr->state = state; +} + +int mrp_request_join(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return -ENOMEM; + + spin_lock_bh(&app->lock); + attr = mrp_attr_create(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + mrp_attr_event(app, attr, MRP_EVENT_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_request_join); + +void mrp_request_leave(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return; + + spin_lock_bh(&app->lock); + attr = mrp_attr_lookup(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + mrp_attr_event(app, attr, MRP_EVENT_LV); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(mrp_request_leave); + +static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event) +{ + struct rb_node *node, *next; + struct mrp_attr *attr; + + for (node = rb_first(&app->mad); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct mrp_attr, node); + mrp_attr_event(app, attr, event); + } +} + +static void mrp_join_timer_arm(struct mrp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void mrp_join_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_queue_xmit(app); + mrp_join_timer_arm(app); +} + +static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) +{ + __be16 endmark; + + if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0) + return -1; + if (endmark == MRP_END_MARK) { + *offset += sizeof(endmark); + return -1; + } + return 0; +} + +static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app, + struct sk_buff *skb, + enum mrp_vecattr_event vaevent) +{ + struct mrp_attr *attr; + enum mrp_event event; + + attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen, + mrp_cb(skb)->mh->attrtype); + if (attr == NULL) + return; + + switch (vaevent) { + case MRP_VECATTR_EVENT_NEW: + event = MRP_EVENT_R_NEW; + break; + case MRP_VECATTR_EVENT_JOIN_IN: + event = MRP_EVENT_R_JOIN_IN; + break; + case MRP_VECATTR_EVENT_IN: + event = MRP_EVENT_R_IN; + break; + case MRP_VECATTR_EVENT_JOIN_MT: + event = MRP_EVENT_R_JOIN_MT; + break; + case MRP_VECATTR_EVENT_MT: + event = MRP_EVENT_R_MT; + break; + case MRP_VECATTR_EVENT_LV: + event = MRP_EVENT_R_LV; + break; + default: + return; + } + + mrp_attr_event(app, attr, event); +} + +static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, + struct sk_buff *skb, int *offset) +{ + struct mrp_vecattr_hdr _vah; + u16 valen; + u8 vaevents, vaevent; + + mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah), + &_vah); + if (!mrp_cb(skb)->vah) + return -1; + *offset += sizeof(_vah); + + if (get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_FLAG_LA) + mrp_mad_event(app, MRP_EVENT_R_LA); + valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_LEN_MASK); + + /* The VectorAttribute structure in a PDU carries event information + * about one or more attributes having consecutive values. Only the + * value for the first attribute is contained in the structure. So + * we make a copy of that value, and then increment it each time we + * advance to the next event in its Vector. + */ + if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen > + FIELD_SIZEOF(struct sk_buff, cb)) + return -1; + if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen) < 0) + return -1; + *offset += mrp_cb(skb)->mh->attrlen; + + /* In a VectorAttribute, the Vector contains events which are packed + * three to a byte. We process one byte of the Vector at a time. + */ + while (valen > 0) { + if (skb_copy_bits(skb, *offset, &vaevents, + sizeof(vaevents)) < 0) + return -1; + *offset += sizeof(vaevents); + + /* Extract and process the first event. */ + vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + if (vaevent >= __MRP_VECATTR_EVENT_MAX) { + /* The byte is malformed; stop processing. */ + return -1; + } + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the second event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + vaevent = vaevents / __MRP_VECATTR_EVENT_MAX; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the third event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= __MRP_VECATTR_EVENT_MAX; + vaevent = vaevents; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + } + return 0; +} + +static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb, + int *offset) +{ + struct mrp_msg_hdr _mh; + + mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh); + if (!mrp_cb(skb)->mh) + return -1; + *offset += sizeof(_mh); + + if (mrp_cb(skb)->mh->attrtype == 0 || + mrp_cb(skb)->mh->attrtype > app->app->maxattr || + mrp_cb(skb)->mh->attrlen == 0) + return -1; + + while (skb->len > *offset) { + if (mrp_pdu_parse_end_mark(skb, offset) < 0) + break; + if (mrp_pdu_parse_vecattr(app, skb, offset) < 0) + return -1; + } + return 0; +} + +static int mrp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct mrp_application *appl = container_of(pt, struct mrp_application, + pkttype); + struct mrp_port *port; + struct mrp_applicant *app; + struct mrp_pdu_hdr _ph; + const struct mrp_pdu_hdr *ph; + int offset = skb_network_offset(skb); + + /* If the interface is in promiscuous mode, drop the packet if + * it was unicast to another host. + */ + if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) + goto out; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto out; + port = rcu_dereference(dev->mrp_port); + if (unlikely(!port)) + goto out; + app = rcu_dereference(port->applicants[appl->type]); + if (unlikely(!app)) + goto out; + + ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph); + if (!ph) + goto out; + offset += sizeof(_ph); + + if (ph->version != app->app->version) + goto out; + + spin_lock(&app->lock); + while (skb->len > offset) { + if (mrp_pdu_parse_end_mark(skb, &offset) < 0) + break; + if (mrp_pdu_parse_msg(app, skb, &offset) < 0) + break; + } + spin_unlock(&app->lock); +out: + kfree_skb(skb); + return 0; +} + +static int mrp_init_port(struct net_device *dev) +{ + struct mrp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->mrp_port, port); + return 0; +} + +static void mrp_release_port(struct net_device *dev) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + unsigned int i; + + for (i = 0; i <= MRP_APPLICATION_MAX; i++) { + if (rtnl_dereference(port->applicants[i])) + return; + } + RCU_INIT_POINTER(dev->mrp_port, NULL); + kfree_rcu(port, rcu); +} + +int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!rtnl_dereference(dev->mrp_port)) { + err = mrp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->group_address); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->mad = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); + mrp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + mrp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(mrp_init_applicant); + +void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + + ASSERT_RTNL(); + + RCU_INIT_POINTER(port->applicants[appl->type], NULL); + + /* Delete timer and generate a final TX event to flush out + * all pending messages before the applicant is gone. + */ + del_timer_sync(&app->join_timer); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + mrp_queue_xmit(app); + + dev_mc_del(dev, appl->group_address); + kfree_rcu(app, rcu); + mrp_release_port(dev); +} +EXPORT_SYMBOL_GPL(mrp_uninit_applicant); + +int mrp_register_application(struct mrp_application *appl) +{ + appl->pkttype.func = mrp_rcv; + dev_add_pack(&appl->pkttype); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_register_application); + +void mrp_unregister_application(struct mrp_application *appl) +{ + dev_remove_pack(&appl->pkttype); +} +EXPORT_SYMBOL_GPL(mrp_unregister_application); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a54963e..8f7517df41a5 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174ead1c..7bc8db08d7ef 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index babfde9f734c..a18714469bf7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -107,8 +109,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -117,19 +121,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { const char *name = real_dev->name; - const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } - if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("Device %s has buggy VLAN hw accel\n", name); - return -EOPNOTSUPP; - } - if (vlan_find_dev(real_dev, vlan_id) != NULL) return -EEXIST; @@ -158,15 +155,18 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev); if (err) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) @@ -188,7 +188,10 @@ int register_vlan_dev(struct net_device *dev) out_upper_dev_unlink: netdev_upper_dev_unlink(real_dev, dev); -out_uninit_applicant: +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -662,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -689,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d94c40c..670f1e8cfc0f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 380440b8ea89..f3b6f515eba6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -144,6 +144,7 @@ err_free: kfree_skb(skb); return NULL; } +EXPORT_SYMBOL(vlan_untag); /* @@ -224,8 +225,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, if (!vid_info) return -ENOMEM; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_add_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_add_vid(dev, vid); if (err) { kfree(vid_info); @@ -282,8 +282,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, unsigned short vid = vid_info->vid; int err; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_kill_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_kill_vid(dev, vid); if (err) { pr_warn("failed to kill vid %d for device %s\n", diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09f9108d4688..19cf81bf9f69 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; @@ -723,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) vlan->netpoll = NULL; - __netpoll_free_rcu(netpoll); + __netpoll_free_async(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 000000000000..d9ec1d5964aa --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/mrp.h> +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80ea1874..1789658b7cd7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } diff --git a/net/Kconfig b/net/Kconfig index 3cc5be0fe420..5a1888bb036d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -209,7 +209,6 @@ source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" -source "net/wanrouter/Kconfig" source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/mac802154/Kconfig" @@ -218,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 4f4ee083064c..091e7b04f301 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ -obj-$(CONFIG_WAN_ROUTER) += wanrouter/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ obj-$(CONFIG_NETROM) += netrom/ @@ -70,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 7485a78484ff..ea0bd31d41c2 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -738,6 +738,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct arphdr *arphdr; struct ethhdr *ethhdr; __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; uint16_t type = 0; /* pull the ethernet header */ @@ -777,9 +778,23 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, ip_src = batadv_arp_ip_src(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) || - ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst)) + ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst) || + ipv4_is_zeronet(ip_src) || ipv4_is_lbcast(ip_src) || + ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst)) goto out; + hw_src = batadv_arp_hw_src(skb, hdr_size); + if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) + goto out; + + /* we don't care about the destination MAC address in ARP requests */ + if (arphdr->ar_op != htons(ARPOP_REQUEST)) { + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + if (is_zero_ether_addr(hw_dst) || + is_multicast_ether_addr(hw_dst)) + goto out; + } + type = ntohs(arphdr->ar_op); out: return type; @@ -1012,6 +1027,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, */ ret = !batadv_is_my_client(bat_priv, hw_dst); out: + if (ret) + kfree_skb(skb); /* if ret == false -> packet has to be delivered to the interface */ return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index fb15b4c076f7..d44672f4a349 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -517,8 +517,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_PENDING ? 'X' : '.'), (tt_common_entry->flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), - no_purge ? last_seen_secs : 0, - no_purge ? last_seen_msecs : 0); + no_purge ? 0 : last_seen_secs, + no_purge ? 0 : last_seen_msecs); } rcu_read_unlock(); } diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 2f67d5ecc907..eb0f4b16ff09 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -290,7 +290,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb, goto done; } - mgr->state = READ_LOC_AMP_INFO; + set_bit(READ_LOC_AMP_INFO, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); done: @@ -499,8 +499,16 @@ send_rsp: if (hdev) hci_dev_put(hdev); - a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, sizeof(rsp), - &rsp); + /* Reply error now and success after HCI Write Remote AMP Assoc + command complete with success status + */ + if (rsp.status != A2MP_STATUS_SUCCESS) { + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, + sizeof(rsp), &rsp); + } else { + set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state); + mgr->ident = hdr->ident; + } skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -840,7 +848,7 @@ struct amp_mgr *amp_mgr_lookup_by_state(u8 state) mutex_lock(&_mgr_list_lock); list_for_each_entry(mgr, &_mgr_list, list) { - if (mgr->state == state) { + if (test_and_clear_bit(state, &mgr->state)) { amp_mgr_get(mgr); mutex_unlock(&_mgr_list_lock); return mgr; @@ -949,6 +957,32 @@ clean: kfree(req); } +void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status) +{ + struct amp_mgr *mgr; + struct a2mp_physlink_rsp rsp; + struct hci_conn *hs_hcon; + + mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC); + if (!mgr) + return; + + hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT); + if (!hs_hcon) { + rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; + } else { + rsp.remote_id = hs_hcon->remote_id; + rsp.status = A2MP_STATUS_SUCCESS; + } + + BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon, + status); + + rsp.local_id = hdev->id; + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp); + amp_mgr_put(mgr); +} + void a2mp_discover_amp(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 1b0d92c0643a..d459ed43c779 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -236,7 +236,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC; + set_bit(READ_LOC_AMP_ASSOC, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); } @@ -250,7 +250,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, cp.len_so_far = cpu_to_le16(0); cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC_FINAL; + set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state); /* Read Local AMP Assoc final link information data */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); @@ -317,7 +317,9 @@ void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle) if (!hcon) return; - amp_write_rem_assoc_frag(hdev, hcon); + /* Send A2MP create phylink rsp when all fragments are written */ + if (amp_write_rem_assoc_frag(hdev, hcon)) + a2mp_send_create_phy_link_rsp(hdev, 0); } void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle) @@ -403,26 +405,20 @@ void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon) void amp_create_logical_link(struct l2cap_chan *chan) { + struct hci_conn *hs_hcon = chan->hs_hcon; struct hci_cp_create_accept_logical_link cp; - struct hci_conn *hcon; struct hci_dev *hdev; - BT_DBG("chan %p", chan); + BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon, chan->conn->dst); - if (!chan->hs_hcon) + if (!hs_hcon) return; hdev = hci_dev_hold(chan->hs_hcon->hdev); if (!hdev) return; - BT_DBG("chan %p dst %pMR", chan, chan->conn->dst); - - hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, chan->conn->dst); - if (!hcon) - goto done; - - cp.phy_handle = hcon->handle; + cp.phy_handle = hs_hcon->handle; cp.tx_flow_spec.id = chan->local_id; cp.tx_flow_spec.stype = chan->local_stype; @@ -438,14 +434,13 @@ void amp_create_logical_link(struct l2cap_chan *chan) cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat); cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to); - if (hcon->out) + if (hs_hcon->out) hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp), &cp); else hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp), &cp); -done: hci_dev_put(hdev); } diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a5b639702637..e430b1abcd2f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -33,7 +33,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#include <net/bluetooth/l2cap.h> #include "bnep.h" diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 25bfce0666eb..4925a02ae7e4 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -249,12 +249,12 @@ static void hci_conn_disconnect(struct hci_conn *conn) __u8 reason = hci_proto_disconn_ind(conn); switch (conn->type) { - case ACL_LINK: - hci_acl_disconn(conn, reason); - break; case AMP_LINK: hci_amp_disconn(conn, reason); break; + default: + hci_acl_disconn(conn, reason); + break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 596660d37c5e..22e77a786545 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,8 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + queue_delayed_work(hdev->req_workqueue, &hdev->power_off, + HCI_AUTO_OFF_TIMEOUT); if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -1182,14 +1183,10 @@ static void hci_discov_off(struct work_struct *work) int hci_uuids_clear(struct hci_dev *hdev) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *uuid; - - uuid = list_entry(p, struct bt_uuid, list); + struct bt_uuid *uuid, *tmp; - list_del(p); + list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) { + list_del(&uuid->list); kfree(uuid); } @@ -1621,8 +1618,8 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, if (err < 0) return err; - schedule_delayed_work(&hdev->le_scan_disable, - msecs_to_jiffies(timeout)); + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + msecs_to_jiffies(timeout)); return 0; } @@ -1799,6 +1796,15 @@ int hci_register_dev(struct hci_dev *hdev) goto err; } + hdev->req_workqueue = alloc_workqueue(hdev->name, + WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!hdev->req_workqueue) { + destroy_workqueue(hdev->workqueue); + error = -ENOMEM; + goto err; + } + error = hci_add_sysfs(hdev); if (error < 0) goto err_wqueue; @@ -1821,12 +1827,13 @@ int hci_register_dev(struct hci_dev *hdev) hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); return id; err_wqueue: destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); write_lock(&hci_dev_list_lock); @@ -1880,6 +1887,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_del_sysfs(hdev); destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); hci_dev_lock(hdev); hci_blacklist_clear(hdev); @@ -2810,14 +2818,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags) && - !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) - mgmt_device_connected(hdev, &conn->dst, conn->type, - conn->dst_type, 0, NULL, 0, - conn->dev_class); - hci_dev_unlock(hdev); - /* Send to upper protocol */ l2cap_recv_acldata(conn, skb, flags); return; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 705078a0cc39..477726a63512 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -609,8 +609,17 @@ static void le_setup(struct hci_dev *hdev) /* Read LE Buffer Size */ hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + /* Read LE Local Supported Features */ + hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + /* Read LE Advertising Channel TX Power */ hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + + /* Read LE White List Size */ + hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + + /* Read LE Supported States */ + hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); } static void hci_setup(struct hci_dev *hdev) @@ -1090,6 +1099,19 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } +static void hci_cc_le_read_local_features(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_local_features *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_features, rp->features, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); +} + static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1290,6 +1312,19 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, } } +static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_white_list_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); + + if (!rp->status) + hdev->le_white_list_size = rp->size; + + hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); +} + static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_le_ltk_reply *rp = (void *) skb->data; @@ -1314,6 +1349,19 @@ static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } +static void hci_cc_le_read_supported_states(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_supported_states *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_states, rp->le_states, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2628,6 +2676,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_buffer_size(hdev, skb); break; + case HCI_OP_LE_READ_LOCAL_FEATURES: + hci_cc_le_read_local_features(hdev, skb); + break; + case HCI_OP_LE_READ_ADV_TX_POWER: hci_cc_le_read_adv_tx_power(hdev, skb); break; @@ -2664,6 +2716,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_set_scan_enable(hdev, skb); break; + case HCI_OP_LE_READ_WHITE_LIST_SIZE: + hci_cc_le_read_white_list_size(hdev, skb); + break; + case HCI_OP_LE_LTK_REPLY: hci_cc_le_ltk_reply(hdev, skb); break; @@ -2672,6 +2728,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_ltk_neg_reply(hdev, skb); break; + case HCI_OP_LE_READ_SUPPORTED_STATES: + hci_cc_le_read_supported_states(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -2688,7 +2748,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); - if (ev->ncmd) { + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); @@ -3928,8 +3988,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) void *ptr = &skb->data[1]; s8 rssi; - hci_dev_lock(hdev); - while (num_reports--) { struct hci_ev_le_advertising_info *ev = ptr; @@ -3939,8 +3997,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) ptr += sizeof(*ev) + ev->length + 1; } - - hci_dev_unlock(hdev); } static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 55cceee02a84..23b4e242a31a 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -2,6 +2,7 @@ #include <linux/debugfs.h> #include <linux/module.h> +#include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -461,19 +462,18 @@ static const struct file_operations blacklist_fops = { static void print_bt_uuid(struct seq_file *f, u8 *uuid) { - __be32 data0, data4; - __be16 data1, data2, data3, data5; + u32 data0, data5; + u16 data1, data2, data3, data4; - memcpy(&data0, &uuid[0], 4); - memcpy(&data1, &uuid[4], 2); - memcpy(&data2, &uuid[6], 2); - memcpy(&data3, &uuid[8], 2); - memcpy(&data4, &uuid[10], 4); - memcpy(&data5, &uuid[14], 2); + data5 = get_unaligned_le32(uuid); + data4 = get_unaligned_le16(uuid + 4); + data3 = get_unaligned_le16(uuid + 6); + data2 = get_unaligned_le16(uuid + 8); + data1 = get_unaligned_le16(uuid + 10); + data0 = get_unaligned_le32(uuid + 12); - seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n", - ntohl(data0), ntohs(data1), ntohs(data2), ntohs(data3), - ntohl(data4), ntohs(data5)); + seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.4x%.8x\n", + data0, data1, data2, data3, data4, data5); } static int uuids_show(struct seq_file *f, void *p) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index b2bcbe2dc328..a7352ff3fd1e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -931,7 +931,7 @@ static int hidp_setup_hid(struct hidp_session *session, hid->version = req->version; hid->country = req->country; - strncpy(hid->name, req->name, 128); + strncpy(hid->name, req->name, sizeof(req->name) - 1); snprintf(hid->phys, sizeof(hid->phys), "%pMR", &bt_sk(session->ctrl_sock->sk)->src); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2c78208d793e..7c7e9321f1ea 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1527,17 +1527,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); switch (hcon->type) { - case AMP_LINK: - conn->mtu = hcon->hdev->block_mtu; - break; - case LE_LINK: if (hcon->hdev->le_mtu) { conn->mtu = hcon->hdev->le_mtu; break; } /* fall through */ - default: conn->mtu = hcon->hdev->acl_mtu; break; @@ -3727,6 +3722,17 @@ sendresp: static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { + struct hci_dev *hdev = conn->hcon->hdev; + struct hci_conn *hcon = conn->hcon; + + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->dev_flags) && + !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) + mgmt_device_connected(hdev, &hcon->dst, hcon->type, + hcon->dst_type, 0, NULL, 0, + hcon->dev_class); + hci_dev_unlock(hdev); + l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); return 0; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f559b966279c..39395c7144aa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -35,7 +35,7 @@ bool enable_hs; #define MGMT_VERSION 1 -#define MGMT_REVISION 2 +#define MGMT_REVISION 3 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -435,35 +435,117 @@ static u32 get_current_settings(struct hci_dev *hdev) #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 bluetooth_base_uuid[] = { - 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; -static u16 get_uuid16(u8 *uuid128) + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) { - u32 val; - int i; + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; - for (i = 0; i < 12; i++) { - if (bluetooth_base_uuid[i] != uuid128[i]) - return 0; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); } - val = get_unaligned_le32(&uuid128[12]); - if (val > 0xffff) - return 0; + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; - return (u16) val; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; } static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; - u16 eir_len = 0; - u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)]; - int i, truncated = 0; - struct bt_uuid *uuid; size_t name_len; name_len = strlen(hdev->dev_name); @@ -481,7 +563,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) memcpy(ptr + 2, hdev->dev_name, name_len); - eir_len += (name_len + 2); ptr += (name_len + 2); } @@ -490,7 +571,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) ptr[1] = EIR_TX_POWER; ptr[2] = (u8) hdev->inq_tx_power; - eir_len += 3; ptr += 3; } @@ -503,60 +583,12 @@ static void create_eir(struct hci_dev *hdev, u8 *data) put_unaligned_le16(hdev->devid_product, ptr + 6); put_unaligned_le16(hdev->devid_version, ptr + 8); - eir_len += 10; ptr += 10; } - memset(uuid16_list, 0, sizeof(uuid16_list)); - - /* Group all UUID16 types */ - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - uuid16 = get_uuid16(uuid->uuid); - if (uuid16 == 0) - return; - - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - /* Stop if not enough space to put next UUID */ - if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) { - truncated = 1; - break; - } - - /* Check for duplicates */ - for (i = 0; uuid16_list[i] != 0; i++) - if (uuid16_list[i] == uuid16) - break; - - if (uuid16_list[i] == 0) { - uuid16_list[i] = uuid16; - eir_len += sizeof(u16); - } - } - - if (uuid16_list[0] != 0) { - u8 *length = ptr; - - /* EIR Data type */ - ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL; - - ptr += 2; - eir_len += 2; - - for (i = 0; uuid16_list[i] != 0; i++) { - *ptr++ = (uuid16_list[i] & 0x00ff); - *ptr++ = (uuid16_list[i] & 0xff00) >> 8; - } - - /* EIR Data length */ - *length = (i * sizeof(u16)) + 1; - } + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); } static int update_eir(struct hci_dev *hdev) @@ -728,13 +760,9 @@ static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void *data), void *data) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->mgmt_pending) { - struct pending_cmd *cmd; - - cmd = list_entry(p, struct pending_cmd, list); + struct pending_cmd *cmd, *tmp; + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; @@ -777,14 +805,19 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { cancel_delayed_work(&hdev->power_off); if (cp->val) { - err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); - mgmt_powered(hdev, 1); + mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, + data, len); + err = mgmt_powered(hdev, 1); goto failed; } } @@ -807,9 +840,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, } if (cp->val) - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); else - schedule_work(&hdev->power_off.work); + queue_work(hdev->req_workqueue, &hdev->power_off.work); err = 0; @@ -872,6 +905,10 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); + timeout = __le16_to_cpu(cp->timeout); if (!cp->val && timeout > 0) return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, @@ -971,6 +1008,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1041,6 +1082,10 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (cp->val) @@ -1073,6 +1118,10 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1133,13 +1182,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_ssp_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_ssp_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_NOT_SUPPORTED); - goto failed; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; @@ -1199,6 +1250,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_INVALID_PARAMS); + if (cp->val) set_bit(HCI_HS_ENABLED, &hdev->dev_flags); else @@ -1217,13 +1272,15 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_le_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_NOT_SUPPORTED); - goto unlock; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; enabled = lmp_host_le_capable(hdev); @@ -1275,6 +1332,25 @@ unlock: return err; } +static const u8 bluetooth_base_uuid[] = { + 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static u8 get_uuid_size(const u8 *uuid) +{ + u32 val; + + if (memcmp(uuid, bluetooth_base_uuid, 12)) + return 128; + + val = get_unaligned_le32(&uuid[12]); + if (val > 0xffff) + return 32; + + return 16; +} + static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; @@ -1300,8 +1376,9 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) memcpy(uuid->uuid, cp->uuid, 16); uuid->svc_hint = cp->svc_hint; + uuid->size = get_uuid_size(cp->uuid); - list_add(&uuid->list, &hdev->uuids); + list_add_tail(&uuid->list, &hdev->uuids); err = update_class(hdev); if (err < 0) @@ -1332,7 +1409,8 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { - schedule_delayed_work(&hdev->service_cache, CACHE_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->service_cache, + CACHE_TIMEOUT); return true; } @@ -1344,7 +1422,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_remove_uuid *cp = data; struct pending_cmd *cmd; - struct list_head *p, *n; + struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int err, found; @@ -1372,9 +1450,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, found = 0; - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *match = list_entry(p, struct bt_uuid, list); - + list_for_each_entry_safe(match, tmp, &hdev->uuids, list) { if (memcmp(match->uuid, cp->uuid, 16) != 0) continue; @@ -1422,13 +1498,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_bredr_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); - goto unlock; - } + if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); + + if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); hdev->major_class = cp->major; hdev->minor_class = cp->minor; @@ -1483,9 +1565,21 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } + if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_link_key_info *key = &cp->keys[i]; + + if (key->addr.type != BDADDR_BREDR) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -1533,12 +1627,22 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + if (cp->disconnect != 0x00 && cp->disconnect != 0x01) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + if (!hdev_is_powered(hdev)) { err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); @@ -1596,6 +1700,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; + struct mgmt_rp_disconnect rp; struct hci_cp_disconnect dc; struct pending_cmd *cmd; struct hci_conn *conn; @@ -1603,17 +1708,26 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto failed; } if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto failed; } @@ -1624,8 +1738,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); goto failed; } @@ -1903,11 +2017,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } @@ -1924,10 +2047,6 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, cp->addr.type, sec_level, auth_type); - memset(&rp, 0, sizeof(rp)); - bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); - rp.addr.type = cp->addr.type; - if (IS_ERR(conn)) { int status; @@ -2254,24 +2373,16 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->hash, cp->randomizer); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2287,24 +2398,15 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_remove_remote_oob_data(hdev, &cp->addr.bdaddr); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2365,31 +2467,45 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - if (lmp_bredr_capable(hdev)) - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); - else - err = -ENOTSUPP; + if (!lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); break; case DISCOV_TYPE_LE: - if (lmp_host_le_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, + LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); break; case DISCOV_TYPE_INTERLEAVED: - if (lmp_host_le_capable(hdev) && lmp_bredr_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, - LE_SCAN_TIMEOUT_BREDR_LE); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, + LE_SCAN_TIMEOUT_BREDR_LE); break; default: - err = -EINVAL; + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); + goto failed; } if (err < 0) @@ -2510,7 +2626,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_inquiry_cache_update_resolve(hdev, e); } - err = 0; + err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, + sizeof(cp->addr)); failed: hci_dev_unlock(hdev); @@ -2526,13 +2643,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2551,13 +2673,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2612,6 +2739,10 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + if (!hdev_is_powered(hdev)) return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_POWERED); @@ -2659,12 +2790,23 @@ done: return err; } +static bool ltk_is_valid(struct mgmt_ltk_info *key) +{ + if (key->authenticated != 0x00 && key->authenticated != 0x01) + return false; + if (key->master != 0x00 && key->master != 0x01) + return false; + if (!bdaddr_type_is_le(key->addr.type)) + return false; + return true; +} + static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_long_term_keys *cp = cp_data; u16 key_count, expected_len; - int i; + int i, err; key_count = __le16_to_cpu(cp->key_count); @@ -2674,11 +2816,20 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, BT_ERR("load_keys: expected %u bytes, got %u bytes", len, expected_len); return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s key_count %u", hdev->name, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_ltk_info *key = &cp->keys[i]; + + if (!ltk_is_valid(key)) + return cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); @@ -2698,9 +2849,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key->enc_size, key->ediv, key->rand); } + err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, + NULL, 0); + hci_dev_unlock(hdev); - return 0; + return err; } static const struct mgmt_handler { @@ -2915,6 +3069,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); if (powered) { + u8 link_sec; + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { u8 ssp = 1; @@ -2938,6 +3094,11 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) sizeof(cp), &cp); } + link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + if (lmp_bredr_capable(hdev)) { set_bredr_scan(hdev); update_class(hdev); @@ -2946,7 +3107,13 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) } } else { u8 status = MGMT_STATUS_NOT_POWERED; + u8 zero_cod[] = { 0, 0, 0 }; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); } err = new_settings(hdev, match.sk); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 201fdf737209..b23e2713fea8 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -257,7 +257,7 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (timer_pending(&s->timer) && del_timer(&s->timer)) + if (del_timer(&s->timer)) rfcomm_session_put(s); } @@ -285,7 +285,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); - if (timer_pending(&d->timer) && del_timer(&d->timer)) + if (del_timer(&d->timer)) rfcomm_dlc_put(d); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 531a93d613d4..b5178d62064e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -352,7 +352,7 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: - if (sco_pi(sk)->conn) { + if (sco_pi(sk)->conn->hcon) { sk->sk_state = BT_DISCONN; sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); hci_conn_put(sco_pi(sk)->conn->hcon); @@ -900,8 +900,6 @@ static void sco_conn_ready(struct sco_conn *conn) BT_DBG("conn %p", conn); - sco_conn_lock(conn); - if (sk) { sco_sock_clear_timer(sk); bh_lock_sock(sk); @@ -909,9 +907,13 @@ static void sco_conn_ready(struct sco_conn *conn) sk->sk_state_change(sk); bh_unlock_sock(sk); } else { + sco_conn_lock(conn); + parent = sco_get_sock_listen(conn->src); - if (!parent) - goto done; + if (!parent) { + sco_conn_unlock(conn); + return; + } bh_lock_sock(parent); @@ -919,7 +921,8 @@ static void sco_conn_ready(struct sco_conn *conn) BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); - goto done; + sco_conn_unlock(conn); + return; } sco_sock_init(sk, parent); @@ -939,10 +942,9 @@ static void sco_conn_ready(struct sco_conn *conn) parent->sk_data_ready(parent, 1); bh_unlock_sock(parent); - } -done: - sco_conn_unlock(conn); + sco_conn_unlock(conn); + } } /* ----- SCO interface with lower layer (HCI) ----- */ diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 68a9587c9694..5abefb12891d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -859,6 +859,19 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(code)); + /* + * The SMP context must be initialized for all other PDUs except + * pairing and security requests. If we get any other PDU when + * not initialized simply disconnect (done if this function + * returns an error). + */ + if (code != SMP_CMD_PAIRING_REQ && code != SMP_CMD_SECURITY_REQ && + !conn->smp_chan) { + BT_ERR("Unexpected SMP command 0x%02x. Disconnecting.", code); + kfree_skb(skb); + return -ENOTSUPP; + } + switch (code) { case SMP_CMD_PAIRING_REQ: reason = smp_cmd_pairing_req(conn, skb); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 6dee7bf648a9..aa0d3b2f1bb7 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -46,3 +46,17 @@ config BRIDGE_IGMP_SNOOPING Say N to exclude this support and reduce the binary size. If unsure, say Y. + +config BRIDGE_VLAN_FILTERING + bool "VLAN filtering" + depends on BRIDGE + depends on VLAN_8021Q + default n + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + receive and forward traffic based on VLAN information in the packet + any VLAN information configured on the bridge port or bridge device. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e859098f5ee9..e85498b2f166 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -14,4 +14,6 @@ bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index e1bc090bc00a..d5f1d3fd4b28 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + u16 vid = 0; rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER @@ -45,6 +46,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); @@ -67,7 +71,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb); - } else if ((dst = __br_fdb_get(br, dest)) != NULL) + } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else br_flood_deliver(br, skb); @@ -176,7 +180,6 @@ static int br_set_mac_address(struct net_device *dev, void *p) br_fdb_change_mac_address(br, addr->sa_data); br_stp_change_bridge_id(br, addr->sa_data); } - br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); return 0; @@ -266,7 +269,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - __netpoll_free_rcu(np); + __netpoll_free_async(np); } #endif @@ -314,6 +317,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9576e6de2b8..8117900af4de 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,11 +23,12 @@ #include <linux/slab.h> #include <linux/atomic.h> #include <asm/unaligned.h> +#include <linux/if_vlan.h> #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *, int); @@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br, time_before_eq(fdb->updated + hold_time(br), jiffies); } -static inline int br_mac_hash(const unsigned char *mac) +static inline int br_mac_hash(const unsigned char *mac, __u16 vid) { - /* use 1 byte of OUI cnd 3 bytes of NIC */ + /* use 1 byte of OUI and 3 bytes of NIC */ u32 key = get_unaligned((u32 *)(mac + 2)); - return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1); + return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); } static void fdb_rcu_free(struct rcu_head *head) @@ -91,6 +92,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge *br = p->br; + bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false; int i; spin_lock_bh(&br->hash_lock); @@ -105,10 +107,12 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) if (f->dst == p && f->is_local) { /* maybe another port has same hw addr? */ struct net_bridge_port *op; + u16 vid = f->vlan_id; list_for_each_entry(op, &br->port_list, list) { if (op != p && ether_addr_equal(op->dev->dev_addr, - f->addr.addr)) { + f->addr.addr) && + nbp_vlan_find(op, vid)) { f->dst = op; goto insert; } @@ -116,27 +120,55 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) /* delete old one */ fdb_delete(br, f); - goto insert; +insert: + /* insert new address, may fail if invalid + * address or dup. + */ + fdb_insert(br, p, newaddr, vid); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (no_vlan) + goto done; } } } - insert: - /* insert new address, may fail if invalid address or dup. */ - fdb_insert(br, p, newaddr); +done: spin_unlock_bh(&br->hash_lock); } void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) { struct net_bridge_fdb_entry *f; + struct net_port_vlans *pv; + u16 vid = 0; /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr); + f = __br_fdb_get(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst) fdb_delete(br, f); - fdb_insert(br, NULL, newaddr); + fdb_insert(br, NULL, newaddr, 0); + + /* Now remove and add entries for every VLAN configured on the + * bridge. This function runs under RTNL so the bitmap will not + * change from under us. + */ + pv = br_get_vlan_info(br); + if (!pv) + return; + + for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { + f = __br_fdb_get(br, br->dev->dev_addr, vid); + if (f && f->is_local && !f->dst) + fdb_delete(br, f); + fdb_insert(br, NULL, newaddr, vid); + } } void br_fdb_cleanup(unsigned long _data) @@ -231,13 +263,16 @@ void br_fdb_delete_by_port(struct net_bridge *br, /* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) { + hlist_for_each_entry_rcu(fdb, h, + &br->hash[br_mac_hash(addr, vid)], hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) { if (unlikely(has_expired(br, fdb))) break; return fdb; @@ -261,7 +296,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr); + fdb = __br_fdb_get(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -325,26 +360,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, } static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; hlist_for_each_entry(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; } static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; hlist_for_each_entry_rcu(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; @@ -352,7 +391,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct net_bridge_fdb_entry *fdb; @@ -360,6 +400,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; + fdb->vlan_id = vid; fdb->is_local = 0; fdb->is_static = 0; fdb->updated = fdb->used = jiffies; @@ -369,15 +410,15 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, } static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. @@ -390,7 +431,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; @@ -401,20 +442,20 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, /* Add entry for local address of interface */ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_insert(br, source, addr); + ret = fdb_insert(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; /* some users want to always flood. */ @@ -426,7 +467,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->state == BR_STATE_FORWARDING)) return; - fdb = fdb_find_rcu(head, addr); + fdb = fdb_find_rcu(head, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { @@ -441,8 +482,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr))) { - fdb = fdb_create(head, source, addr); + if (likely(!fdb_find(head, addr, vid))) { + fdb = fdb_create(head, source, addr, vid); if (fdb) fdb_notify(br, fdb, RTM_NEWNEIGH); } @@ -495,6 +536,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -506,6 +551,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -571,18 +617,18 @@ out: /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state, __u16 flags) + __u16 state, __u16 flags, __u16 vid) { struct net_bridge *br = source->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; fdb_notify(br, fdb, RTM_NEWNEIGH); @@ -607,6 +653,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -614,12 +679,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -627,40 +709,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -668,9 +800,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 02015a505d2a..092b20e4ee4c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,6 +31,7 @@ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && p->state == BR_STATE_FORWARDING); } @@ -63,6 +64,10 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + skb->dev = to->dev; if (unlikely(netpoll_tx_running(to->br->dev))) { @@ -88,6 +93,10 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) return; } + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + indev = skb->dev; skb->dev = to->dev; skb_forward_csum(skb); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2148d474a04f..ef1b91431c6b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include <linux/if_ether.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/if_vlan.h> #include "br_private.h" @@ -139,6 +140,7 @@ static void del_nbp(struct net_bridge_port *p) br_ifinfo_notify(RTM_DELLINK, p); + nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 1); list_del_rcu(&p->list); @@ -395,7 +397,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); - if (br_fdb_insert(br, p, dev->dev_addr)) + if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); kobject_uevent(&p->kobj, KOBJ_ADD); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4b34207419b1..480330151898 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -17,6 +17,7 @@ #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> #include <linux/export.h> +#include <linux/rculist.h> #include "br_private.h" /* Hook for brouter */ @@ -34,6 +35,20 @@ static int br_pass_frame_up(struct sk_buff *skb) brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + /* Bridge is just like any other port. Make sure the + * packet is allowed except in promisc modue when someone + * may be running packet capture. + */ + if (!(brdev->flags & IFF_PROMISC) && + !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + skb = br_handle_vlan(br, br_get_vlan_info(br), skb); + if (!skb) + return NET_RX_DROP; + indev = skb->dev; skb->dev = brdev; @@ -50,13 +65,17 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) goto drop; + if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) + goto drop; + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -91,7 +110,8 @@ int br_handle_frame_finish(struct sk_buff *skb) skb2 = skb; br->dev->stats.multicast++; - } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + } else if ((dst = __br_fdb_get(br, dest, vid)) && + dst->is_local) { skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; @@ -119,8 +139,10 @@ drop: static int br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); + u16 vid = 0; - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_vlan_get_tag(skb, &vid); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index acc9f4cc18f7..38991e03646d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -272,9 +272,6 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); if (err < 0) return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6d6f26531de2..7d886b0a8b7b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -39,6 +39,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) return 0; + if (a->vid != b->vid) + return 0; switch (a->proto) { case htons(ETH_P_IP): return a->u.ip4 == b->u.ip4; @@ -50,16 +52,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) return 0; } -static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip, + __u16 vid) { - return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); + return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1); } #if IS_ENABLED(CONFIG_IPV6) static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, - const struct in6_addr *ip) + const struct in6_addr *ip, + __u16 vid) { - return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); + return jhash_2words(ipv6_addr_hash(ip), vid, + mdb->secret) & (mdb->max - 1); } #endif @@ -68,10 +73,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, { switch (ip->proto) { case htons(ETH_P_IP): - return __br_ip4_hash(mdb, ip->u.ip4); + return __br_ip4_hash(mdb, ip->u.ip4, ip->vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return __br_ip6_hash(mdb, &ip->u.ip6); + return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid); #endif } return 0; @@ -101,24 +106,27 @@ struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, } static struct net_bridge_mdb_entry *br_mdb_ip4_get( - struct net_bridge_mdb_htable *mdb, __be32 dst) + struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid) { struct br_ip br_dst; br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } #if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get( - struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst, + __u16 vid) { struct br_ip br_dst; br_dst.u.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } @@ -694,7 +702,8 @@ err: static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -703,6 +712,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -710,7 +720,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -719,6 +730,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -895,10 +907,12 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; + br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -930,7 +944,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_ip4_multicast_add_group(br, port, group); + err = br_ip4_multicast_add_group(br, port, group, vid); if (err) break; } @@ -949,10 +963,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; + br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -990,7 +1006,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } - err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, + vid); if (!err) break; } @@ -1074,6 +1091,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1108,7 +1126,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1149,6 +1168,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1180,7 +1200,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1286,7 +1307,8 @@ out: static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -1295,6 +1317,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1302,7 +1325,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -1311,6 +1335,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1326,6 +1351,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; + u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1386,6 +1412,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1393,7 +1420,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group); + err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb2); @@ -1402,7 +1429,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_query(br, port, skb2); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group); + br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } @@ -1427,6 +1454,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1510,6 +1538,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1522,7 +1551,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, } mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; } case ICMPV6_MLD2_REPORT: @@ -1539,7 +1568,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - br_ip6_multicast_leave_group(br, port, &mld->mld_mca); + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); } } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..27aa3ee517ce 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <uapi/linux/if_bridge.h> #include "br_private.h" #include "br_private_stp.h" @@ -64,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -98,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -107,6 +114,48 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + u16 pvid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + pvid = br_get_pvid(pv); + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -119,10 +168,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -130,7 +183,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -144,24 +197,85 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } +static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +355,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +363,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { @@ -292,6 +445,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +static struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -305,11 +481,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -318,5 +501,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 711094aed41a..6d314c4e6bcb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include <linux/netpoll.h> #include <linux/u64_stats_sync.h> #include <net/route.h> +#include <linux/if_vlan.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -26,6 +27,7 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) +#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) #define BR_VERSION "2.3" @@ -61,6 +63,20 @@ struct br_ip #endif } u; __be16 proto; + __u16 vid; +}; + +struct net_port_vlans { + u16 port_idx; + u16 pvid; + union { + struct net_bridge_port *port; + struct net_bridge *br; + } parent; + struct rcu_head rcu; + unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -74,6 +90,7 @@ struct net_bridge_fdb_entry mac_addr addr; unsigned char is_local; unsigned char is_static; + __u16 vlan_id; }; struct net_bridge_port_group { @@ -156,6 +173,9 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_port_vlans __rcu *vlan_info; +#endif }; #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) @@ -197,9 +217,6 @@ struct net_bridge bool nf_call_ip6tables; bool nf_call_arptables; #endif - unsigned long flags; -#define BR_SET_MAC_ADDR 0x00000001 - u16 group_fwd_mask; /* STP */ @@ -260,6 +277,10 @@ struct net_bridge struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + struct net_port_vlans __rcu *vlan_info; +#endif }; struct br_input_skb_cb { @@ -355,18 +376,22 @@ extern void br_fdb_cleanup(unsigned long arg); extern void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, int do_all); extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr); + const unsigned char *addr, + __u16 vid); extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); extern int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); extern void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); +extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -534,6 +559,142 @@ static inline void br_mdb_uninit(void) } #endif +/* br_vlan.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid); +extern bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb); +extern struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +extern int br_vlan_delete(struct net_bridge *br, u16 vid); +extern void br_vlan_flush(struct net_bridge *br); +extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +extern void nbp_vlan_flush(struct net_bridge_port *port); +extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return rcu_dereference_rtnl(br->vlan_info); +} + +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return rcu_dereference_rtnl(p->vlan_info); +} + +/* Since bridge now depends on 8021Q module, but the time bridge sees the + * skb, the vlan tag will always be present if the frame was tagged. + */ +static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) +{ + int err = 0; + + if (vlan_tx_tag_present(skb)) + *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + else { + *vid = 0; + err = -EINVAL; + } + + return err; +} + +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if + * vid wasn't set + */ + smp_rmb(); + return (v->pvid & VLAN_TAG_PRESENT) ? + (v->pvid & ~VLAN_TAG_PRESENT) : + VLAN_N_VID; +} + +#else +static inline bool br_allowed_ingress(struct net_bridge *br, + struct net_port_vlans *v, + struct sk_buff *skb, + u16 *vid) +{ + return true; +} + +static inline bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + return true; +} + +static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb) +{ + return skb; +} + +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void br_vlan_flush(struct net_bridge *br) +{ +} + +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void nbp_vlan_flush(struct net_bridge_port *port) +{ +} + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return NULL; +} +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + return false; +} + +static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) +{ + return 0; +} +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + return VLAN_N_VID; /* Returns invalid vid */ +} +#endif + /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); @@ -594,8 +755,9 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 7f884e3fb955..8660ea3be705 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -16,6 +16,7 @@ #include <linux/etherdevice.h> #include <linux/llc.h> #include <linux/slab.h> +#include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> @@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p, skb->dev = p->dev; skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); memcpy(__skb_put(skb, length), data, length); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 7b5197c7de13..0bdb4ebd362b 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -216,7 +216,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) struct net_bridge_port *p; /* user has chosen a value so keep it */ - if (br->flags & BR_SET_MAC_ADDR) + if (br->dev->addr_assign_type == NET_ADDR_SET) return false; list_for_each_entry(p, &br->port_list, list) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 5913a3a0047b..8baa9c08e1a4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -691,6 +691,24 @@ static ssize_t store_nf_call_arptables( static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, show_nf_call_arptables, store_nf_call_arptables); #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +static ssize_t show_vlan_filtering(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->vlan_enabled); +} + +static ssize_t store_vlan_filtering(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); +} +static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, + show_vlan_filtering, store_vlan_filtering); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -732,6 +750,9 @@ static struct attribute *bridge_attrs[] = { &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + &dev_attr_vlan_filtering.attr, +#endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c new file mode 100644 index 000000000000..93dde75923f0 --- /dev/null +++ b/net/bridge/br_vlan.c @@ -0,0 +1,415 @@ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "br_private.h" + +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) +{ + struct net_bridge_port *p = NULL; + struct net_bridge *br; + struct net_device *dev; + int err; + + if (test_bit(vid, v->vlan_bitmap)) { + __vlan_add_flags(v, vid, flags); + return 0; + } + + if (vid) { + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); + if (err) + return err; + } + + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; + } + + } + + set_bit(vid, v->vlan_bitmap); + v->num_vlans++; + __vlan_add_flags(v, vid, flags); + + return 0; + +out_filt: + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + return err; +} + +static int __vlan_del(struct net_port_vlans *v, u16 vid) +{ + if (!test_bit(vid, v->vlan_bitmap)) + return -EINVAL; + + __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); + + if (v->port_idx && vid) { + struct net_device *dev = v->parent.port->dev; + + if (dev->features & NETIF_F_HW_VLAN_FILTER) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + } + + clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; + if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); + } + return 0; +} + +static void __vlan_flush(struct net_port_vlans *v) +{ + smp_wmb(); + v->pvid = 0; + bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); +} + +/* Strip the tag from the packet. Will return skb with tci set 0. */ +static struct sk_buff *br_vlan_untag(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_8021Q)) { + skb->vlan_tci = 0; + return skb; + } + + skb->vlan_tci = 0; + skb = vlan_untag(skb); + if (skb) + skb->vlan_tci = 0; + + return skb; +} + +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *pv, + struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + goto out; + + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id is set in the untagged bitmap, + * send untagged; otherwise, send taged. + */ + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, pv->untagged_bitmap)) + skb = br_vlan_untag(skb); + else { + /* Egress policy says "send tagged". If output device + * is the bridge, we need to add the VLAN header + * ourselves since we'll be going through the RX path. + * Sending to ports puts the frame on the TX path and + * we let dev_hard_start_xmit() add the header. + */ + if (skb->protocol != htons(ETH_P_8021Q) && + pv->port_idx == 0) { + /* vlan_put_tag expects skb->data to point to + * mac header. + */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_tci); + if (!skb) + goto out; + /* put skb->data back to where it was */ + skb_pull(skb, ETH_HLEN); + skb->vlan_tci = 0; + } + } + +out: + return skb; +} + +/* Called under RCU */ +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid) +{ + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br->vlan_enabled) + return true; + + /* If there are no vlan in the permitted list, all packets are + * rejected. + */ + if (!v) + return false; + + if (br_vlan_get_tag(skb, vid)) { + u16 pvid = br_get_pvid(v); + + /* Frame did not have a tag. See if pvid is set + * on this port. That tells us which vlan untagged + * traffic belongs to. + */ + if (pvid == VLAN_N_VID) + return false; + + /* PVID is set on this port. Any untagged ingress + * frame is considered to belong to this vlan. + */ + __vlan_hwaccel_put_tag(skb, pvid); + return true; + } + + /* Frame had a valid vlan tag. See if vlan is allowed */ + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Called under RCU. */ +bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + return true; + + if (!v) + return false; + + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Must be protected by RTNL */ +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) + return -ENOMEM; + + pv->parent.br = br; + err = __vlan_add(pv, vid, flags); + if (err) + goto out; + + rcu_assign_pointer(br->vlan_info, pv); + return 0; +out: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); + } + + __vlan_del(pv, vid); + return 0; +} + +void br_vlan_flush(struct net_bridge *br) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + if (!rtnl_trylock()) + return restart_syscall(); + + if (br->vlan_enabled == val) + goto unlock; + + br->vlan_enabled = val; + +unlock: + rtnl_unlock(); + return 0; +} + +/* Must be protected by RTNL */ +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) { + err = -ENOMEM; + goto clean_up; + } + + pv->port_idx = port->port_no; + pv->parent.port = port; + err = __vlan_add(pv, vid, flags); + if (err) + goto clean_up; + + rcu_assign_pointer(port->vlan_info, pv); + return 0; + +clean_up: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); + } + + return __vlan_del(pv, vid); +} + +void nbp_vlan_flush(struct net_bridge_port *port) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(port->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3476ec469740..3bf43f7bb9d4 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -70,8 +70,7 @@ static void ulog_send(unsigned int nlgroup) { ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); if (!ub->skb) return; @@ -319,8 +318,7 @@ static void __exit ebt_ulog_fini(void) xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); spin_lock_bh(&ub->lock); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5fe2ff3b01ef..8d493c91a562 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1472,16 +1472,17 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch(cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(sock_net(sk), user, len); + ret = update_counters(net, user, len); break; default: ret = -EINVAL; @@ -1494,14 +1495,15 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret; struct ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2279,16 +2281,17 @@ static int compat_do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case EBT_SO_SET_ENTRIES: - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = compat_update_counters(sock_net(sk), user, len); + ret = compat_update_counters(net, user, len); break; default: ret = -EINVAL; @@ -2302,8 +2305,9 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, int ret; struct compat_ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* try real handler in case userland supplied needed padding */ @@ -2314,7 +2318,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/can/bcm.c b/net/can/bcm.c index ccc27b9e8384..28e12d18f0f1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,8 +261,8 @@ static void bcm_can_tx(struct bcm_op *op) if (!skb) goto out; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1207,7 +1207,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) if (!skb) return -ENOMEM; - skb_reserve(skb, sizeof(struct can_skb_priv)); + can_skb_reserve(skb); err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ); if (err < 0) { @@ -1221,7 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) return -ENODEV; } - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_prv(skb)->ifindex = dev->ifindex; skb->dev = dev; skb->sk = sk; err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/gw.c b/net/can/gw.c index acdd4656cc3b..c185fcd5e828 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -381,9 +381,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) /* is sending the skb back to the incoming interface not allowed? */ if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && - skb_headroom(skb) == sizeof(struct can_skb_priv) && - (((struct can_skb_priv *)(skb->head))->ifindex == - gwj->dst.dev->ifindex)) + can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; /* diff --git a/net/can/raw.c b/net/can/raw.c index 5d860e8dcc52..c1764e41ddaf 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -705,8 +705,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto put_dev; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) diff --git a/net/core/datagram.c b/net/core/datagram.c index 0337e2b76862..368f9c3f9dc6 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { - if (*off >= skb->len) { + if (*off >= skb->len && skb->len) { *off -= skb->len; continue; } diff --git a/net/core/dev.c b/net/core/dev.c index a83375d3af72..2f31bf97ba65 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1266,6 +1266,14 @@ static int __dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + /* Block netpoll from trying to do any rx path servicing. + * If we don't do this there is a chance ndo_poll_controller + * or ndo_poll may be running while we open the device + */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); if (ret) @@ -1279,6 +1287,8 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); + netpoll_rx_enable(dev); + if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { @@ -1370,9 +1380,16 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); + /* Temporarily disable netpoll until the interface is down */ + retval = netpoll_rx_disable(dev); + if (retval) + return retval; + list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); return retval; } @@ -1408,14 +1425,22 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { + int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); + /* Block netpoll rx while the interface is going down */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + list_add(&dev->unreg_list, &single); dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); } - return 0; + return ret; } EXPORT_SYMBOL(dev_close); @@ -2302,18 +2327,29 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + /** - * skb_gso_segment - Perform segmentation on skb. + * __skb_gso_segment - Perform segmentation on skb. * @skb: buffer to segment * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path * * This function segments the given skb and returns a list of segments. * * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; @@ -2336,7 +2372,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb_needs_check(skb, tx_path))) { skb_warn_bad_offload(skb); if (skb_header_cloned(skb) && @@ -2365,7 +2401,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, return segs; } -EXPORT_SYMBOL(skb_gso_segment); +EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG @@ -2799,6 +2835,8 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + skb_reset_mac_header(skb); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -5958,10 +5996,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) { - pr_err("netdev: Unable to allocate %u rx queues\n", count); + if (!rx) return -ENOMEM; - } + dev->_rx = rx; for (i = 0; i < count; i++) @@ -5992,10 +6029,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev) BUG_ON(count < 1); tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) { - pr_err("netdev: Unable to allocate %u tx queues\n", count); + if (!tx) return -ENOMEM; - } + dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -6054,6 +6090,14 @@ int register_netdevice(struct net_device *dev) } } + if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && + (!dev->netdev_ops->ndo_vlan_rx_add_vid || + !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { + netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); + ret = -EINVAL; + goto err_uninit; + } + ret = -EBUSY; if (!dev->ifindex) dev->ifindex = dev_new_index(net); @@ -6474,10 +6518,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, alloc_size += NETDEV_ALIGN - 1; p = kzalloc(alloc_size, GFP_KERNEL); - if (!p) { - pr_err("alloc_netdev: Unable to allocate device\n"); + if (!p) return NULL; - } dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bd0eedb357f..3863b8f639c5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device goto out_entries; } - if (tbl->entry_size) - n = kzalloc(tbl->entry_size, GFP_ATOMIC); - else { - int sz = sizeof(*n) + tbl->key_len; - - sz = ALIGN(sz, NEIGH_PRIV_ALIGN); - sz += dev->neigh_priv_len; - n = kzalloc(sz, GFP_ATOMIC); - } + n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); if (!n) goto out_entries; @@ -1546,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); + if (!tbl->entry_size) + tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + + tbl->key_len, NEIGH_PRIV_ALIGN); + else + WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); + rwlock_init(&tbl->lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e2f79a14625c..fa32899006a2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,6 +47,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; +static struct srcu_struct netpoll_srcu; + #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -59,6 +61,7 @@ static atomic_t trapped; static void zap_completion_queue(void); static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -199,18 +202,31 @@ static void netpoll_poll_dev(struct net_device *dev) const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - if (!dev || !netif_running(dev)) + /* Don't do any rx activity if the dev_lock mutex is held + * the dev_open/close paths use this to block netpoll activity + * while changing device state + */ + if (!mutex_trylock(&ni->dev_lock)) return; + if (!netif_running(dev)) { + mutex_unlock(&ni->dev_lock); + return; + } + ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) + if (!ops->ndo_poll_controller) { + mutex_unlock(&ni->dev_lock); return; + } /* Process pending work on NIC */ ops->ndo_poll_controller(dev); poll_napi(dev); + mutex_unlock(&ni->dev_lock); + if (dev->flags & IFF_SLAVE) { if (ni) { struct net_device *bond_dev; @@ -231,6 +247,31 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +int netpoll_rx_disable(struct net_device *dev) +{ + struct netpoll_info *ni; + int idx; + might_sleep(); + idx = srcu_read_lock(&netpoll_srcu); + ni = srcu_dereference(dev->npinfo, &netpoll_srcu); + if (ni) + mutex_lock(&ni->dev_lock); + srcu_read_unlock(&netpoll_srcu, idx); + return 0; +} +EXPORT_SYMBOL(netpoll_rx_disable); + +void netpoll_rx_enable(struct net_device *dev) +{ + struct netpoll_info *ni; + rcu_read_lock(); + ni = rcu_dereference(dev->npinfo); + if (ni) + mutex_unlock(&ni->dev_lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netpoll_rx_enable); + static void refill_skbs(void) { struct sk_buff *skb; @@ -635,7 +676,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (memcmp(daddr, &np->local_ip, sizeof(*daddr))) + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; hlen = LL_RESERVED_SPACE(np->dev); @@ -666,7 +707,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr); + target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, @@ -828,9 +869,9 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (udp6_csum_init(skb, uh, IPPROTO_UDP)) goto out; list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (memcmp(&np->local_ip.in6, &ip6h->daddr, sizeof(struct in6_addr)) != 0) + if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) continue; - if (memcmp(&np->remote_ip.in6, &ip6h->saddr, sizeof(struct in6_addr)) != 0) + if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) continue; if (np->local_port && np->local_port != ntohs(uh->dest)) continue; @@ -984,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { @@ -1004,6 +1046,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); + mutex_init(&npinfo->dev_lock); skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1017,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto free_npinfo; } } else { - npinfo = ndev->npinfo; + npinfo = rtnl_dereference(ndev->npinfo); atomic_inc(&npinfo->refcnt); } @@ -1049,8 +1092,10 @@ int netpoll_setup(struct netpoll *np) int err; rtnl_lock(); - if (np->dev_name) - ndev = __dev_get_by_name(&init_net, np->dev_name); + if (np->dev_name) { + struct net *net = current->nsproxy->net_ns; + ndev = __dev_get_by_name(net, np->dev_name); + } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -1167,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); + init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); @@ -1194,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np) struct netpoll_info *npinfo; unsigned long flags; - npinfo = np->dev->npinfo; + /* rtnl_dereference would be preferable here but + * rcu_cleanup_netpoll path can put us in here safely without + * holding the rtnl, so plain rcu_dereference it is + */ + npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; @@ -1206,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + synchronize_srcu(&netpoll_srcu); + if (atomic_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -1213,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - RCU_INIT_POINTER(np->dev->npinfo, NULL); + rcu_assign_pointer(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +static void netpoll_async_cleanup(struct work_struct *work) { - struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); + struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + rtnl_lock(); __netpoll_cleanup(np); + rtnl_unlock(); kfree(np); } -void __netpoll_free_rcu(struct netpoll *np) +void __netpoll_free_async(struct netpoll *np) { - call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); + schedule_work(&np->cleanup_work); } -EXPORT_SYMBOL_GPL(__netpoll_free_rcu); +EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5e67defe2cb0..0777d0aa18c3 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx) /* allocate & copy */ new = kzalloc(new_sz, GFP_KERNEL); - if (!new) { - pr_warn("Unable to alloc new priomap!\n"); + if (!new) return -ENOMEM; - } if (old) memcpy(new->priomap, old->priomap, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b29dacf900f9..2201e699ad67 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -164,6 +164,7 @@ #ifdef CONFIG_XFRM #include <net/xfrm.h> #endif +#include <net/netns/generic.h> #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <linux/bitops.h> @@ -212,7 +213,6 @@ #define PKTGEN_MAGIC 0xbe9be955 #define PG_PROC_DIR "pktgen" #define PGCTRL "pgctrl" -static struct proc_dir_entry *pg_proc_dir; #define MAX_CFLOWS 65536 @@ -397,7 +397,15 @@ struct pktgen_hdr { __be32 tv_usec; }; -static bool pktgen_exiting __read_mostly; + +static int pg_net_id __read_mostly; + +struct pktgen_net { + struct net *net; + struct proc_dir_entry *proc_dir; + struct list_head pktgen_threads; + bool pktgen_exiting; +}; struct pktgen_thread { spinlock_t if_lock; /* for list of devices */ @@ -414,6 +422,7 @@ struct pktgen_thread { wait_queue_head_t queue; struct completion start_done; + struct pktgen_net *net; }; #define REMOVE 1 @@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname, bool exact); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_reset_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); +static void pktgen_run_all_threads(struct pktgen_net *pn); +static void pktgen_reset_all_threads(struct pktgen_net *pn); +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); @@ -442,7 +451,6 @@ static int pg_clone_skb_d __read_mostly; static int debug __read_mostly; static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, @@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, { int err = 0; char data[128]; + struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); if (!capable(CAP_NET_ADMIN)) { err = -EPERM; @@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, data[count - 1] = 0; /* Make string */ if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(); + pktgen_stop_all_threads_ifs(pn); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); + pktgen_run_all_threads(pn); else if (!strcmp(data, "reset")) - pktgen_reset_all_threads(); + pktgen_reset_all_threads(pn); else pr_warning("Unknown command: %s\n", data); @@ -1781,10 +1790,13 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + ret = pktgen_add_device(t, f); mutex_unlock(&pktgen_thread_lock); - ret = count; - sprintf(pg_result, "OK: add_device=%s", f); + if (!ret) { + ret = count; + sprintf(pg_result, "OK: add_device=%s", f); + } else + sprintf(pg_result, "ERROR: can not add device %s", f); goto out; } @@ -1824,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = { }; /* Think find or remove for NN */ -static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) +static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, + const char *ifname, int remove) { struct pktgen_thread *t; struct pktgen_dev *pkt_dev = NULL; bool exact = (remove == FIND); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { @@ -1848,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) /* * mark a device for removal */ -static void pktgen_mark_device(const char *ifname) +static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname) { struct pktgen_dev *pkt_dev = NULL; const int max_tries = 10, msec_per_try = 125; @@ -1859,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname) while (1) { - pkt_dev = __pktgen_NN_threads(ifname, REMOVE); + pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE); if (pkt_dev == NULL) break; /* success */ @@ -1880,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname) mutex_unlock(&pktgen_thread_lock); } -static void pktgen_change_name(struct net_device *dev) +static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev) { struct pktgen_thread *t; - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); pkt_dev->entry = proc_create_data(dev->name, 0600, - pg_proc_dir, + pn->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) @@ -1909,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); - if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) + if (pn->pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -1919,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGENAME: - pktgen_change_name(dev); + pktgen_change_name(pn, dev); break; case NETDEV_UNREGISTER: - pktgen_mark_device(dev->name); + pktgen_mark_device(pn, dev->name); break; } return NOTIFY_DONE; } -static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, +static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { char b[IFNAMSIZ+5]; @@ -1944,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, } b[i] = 0; - return dev_get_by_name(&init_net, b); + return dev_get_by_name(pn->net, b); } /* Associate pktgen_dev with a device. */ -static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) +static int pktgen_setup_dev(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { struct net_device *odev; int err; @@ -1961,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = pktgen_dev_get_by_name(pkt_dev, ifname); + odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname); if (!odev) { pr_err("no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -2203,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; + struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(&init_net, DUMMY_MARK, + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, @@ -2912,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t) t->control &= ~(T_STOP); } -static void pktgen_stop_all_threads_ifs(void) +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2920,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= T_STOP; mutex_unlock(&pktgen_thread_lock); @@ -2956,28 +2973,28 @@ signal: return 0; } -static int pktgen_wait_all_threads_run(void) +static int pktgen_wait_all_threads_run(struct pktgen_net *pn) { struct pktgen_thread *t; int sig = 1; mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { sig = pktgen_wait_thread_run(t); if (sig == 0) break; } if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_STOP); mutex_unlock(&pktgen_thread_lock); return sig; } -static void pktgen_run_all_threads(void) +static void pktgen_run_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2985,7 +3002,7 @@ static void pktgen_run_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_RUN); mutex_unlock(&pktgen_thread_lock); @@ -2993,10 +3010,10 @@ static void pktgen_run_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } -static void pktgen_reset_all_threads(void) +static void pktgen_reset_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -3004,7 +3021,7 @@ static void pktgen_reset_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_REMDEVALL); mutex_unlock(&pktgen_thread_lock); @@ -3012,7 +3029,7 @@ static void pktgen_reset_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -3154,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - - remove_proc_entry(t->tsk->comm, pg_proc_dir); - + remove_proc_entry(t->tsk->comm, t->net->proc_dir); } static void pktgen_resched(struct pktgen_dev *pkt_dev) @@ -3302,7 +3317,7 @@ static int pktgen_thread_worker(void *arg) pkt_dev = next_to_run(t); if (unlikely(!pkt_dev && t->control == 0)) { - if (pktgen_exiting) + if (t->net->pktgen_exiting) break; wait_event_interruptible_timeout(t->queue, t->control != 0, @@ -3424,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) /* We don't allow a device to be on several threads */ - pkt_dev = __pktgen_NN_threads(ifname, FIND); + pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND); if (pkt_dev) { pr_err("ERROR: interface already used\n"); return -EBUSY; @@ -3459,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_id = 0xffff; pkt_dev->node = -1; - err = pktgen_setup_dev(pkt_dev, ifname); + err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) goto out1; if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, + pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { pr_err("cannot create %s/%s procfs entry\n", @@ -3490,7 +3505,7 @@ out1: return err; } -static int __init pktgen_create_thread(int cpu) +static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) { struct pktgen_thread *t; struct proc_dir_entry *pe; @@ -3508,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); - list_add_tail(&t->th_list, &pktgen_threads); + list_add_tail(&t->th_list, &pn->pktgen_threads); init_completion(&t->start_done); p = kthread_create_on_node(pktgen_thread_worker, @@ -3524,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu) kthread_bind(p, cpu); t->tsk = p; - pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, + pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, &pktgen_thread_fops, t); if (!pe) { pr_err("cannot create %s/%s procfs entry\n", @@ -3535,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu) return -EINVAL; } + t->net = pn; wake_up_process(p); wait_for_completion(&t->start_done); @@ -3560,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) { + struct pktgen_net *pn = t->net; pr_debug("remove_device pkt_dev=%p\n", pkt_dev); @@ -3580,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, _rem_dev_from_if_list(t, pkt_dev); if (pkt_dev->entry) - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3592,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t, return 0; } -static int __init pg_init(void) +static int __net_init pg_net_init(struct net *net) { - int cpu; + struct pktgen_net *pn = net_generic(net, pg_net_id); struct proc_dir_entry *pe; - int ret = 0; - - pr_info("%s", version); - - pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); - if (!pg_proc_dir) + int cpu, ret = 0; + + pn->net = net; + INIT_LIST_HEAD(&pn->pktgen_threads); + pn->pktgen_exiting = false; + pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net); + if (!pn->proc_dir) { + pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR); return -ENODEV; - - pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); + } + pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops); if (pe == NULL) { - pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); + pr_err("cannot create %s procfs entry\n", PGCTRL); ret = -EINVAL; - goto remove_dir; + goto remove; } - register_netdevice_notifier(&pktgen_notifier_block); - for_each_online_cpu(cpu) { int err; - err = pktgen_create_thread(cpu); + err = pktgen_create_thread(cpu, pn); if (err) - pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", + pr_warn("Cannot create thread for cpu %d (%d)\n", cpu, err); } - if (list_empty(&pktgen_threads)) { - pr_err("ERROR: Initialization failed for all threads\n"); + if (list_empty(&pn->pktgen_threads)) { + pr_err("Initialization failed for all threads\n"); ret = -ENODEV; - goto unregister; + goto remove_entry; } return 0; - unregister: - unregister_netdevice_notifier(&pktgen_notifier_block); - remove_proc_entry(PGCTRL, pg_proc_dir); - remove_dir: - proc_net_remove(&init_net, PG_PROC_DIR); +remove_entry: + remove_proc_entry(PGCTRL, pn->proc_dir); +remove: + proc_net_remove(pn->net, PG_PROC_DIR); return ret; } -static void __exit pg_cleanup(void) +static void __net_exit pg_net_exit(struct net *net) { + struct pktgen_net *pn = net_generic(net, pg_net_id); struct pktgen_thread *t; struct list_head *q, *n; LIST_HEAD(list); /* Stop all interfaces & threads */ - pktgen_exiting = true; + pn->pktgen_exiting = true; mutex_lock(&pktgen_thread_lock); - list_splice_init(&pktgen_threads, &list); + list_splice_init(&pn->pktgen_threads, &list); mutex_unlock(&pktgen_thread_lock); list_for_each_safe(q, n, &list) { @@ -3658,12 +3675,36 @@ static void __exit pg_cleanup(void) kfree(t); } - /* Un-register us from receiving netdevice events */ - unregister_netdevice_notifier(&pktgen_notifier_block); + remove_proc_entry(PGCTRL, pn->proc_dir); + proc_net_remove(pn->net, PG_PROC_DIR); +} + +static struct pernet_operations pg_net_ops = { + .init = pg_net_init, + .exit = pg_net_exit, + .id = &pg_net_id, + .size = sizeof(struct pktgen_net), +}; + +static int __init pg_init(void) +{ + int ret = 0; - /* Clean up proc file system */ - remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(&init_net, PG_PROC_DIR); + pr_info("%s", version); + ret = register_pernet_subsys(&pg_net_ops); + if (ret) + return ret; + ret = register_netdevice_notifier(&pktgen_notifier_block); + if (ret) + unregister_pernet_subsys(&pg_net_ops); + + return ret; +} + +static void __exit pg_cleanup(void) +{ + unregister_netdevice_notifier(&pktgen_notifier_block); + unregister_pernet_subsys(&pg_net_ops); } module_init(pg_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9a419b099482..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2058,9 +2058,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 *addr; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) return err; @@ -2122,7 +2119,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; @@ -2130,8 +2127,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2145,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2161,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2171,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2321,6 +2323,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2330,14 +2339,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2378,14 +2388,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } @@ -2470,6 +2480,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2653,6 +2734,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/scm.c b/net/core/scm.c index 57fb1ee6649f..905dcc6ad1e3 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -35,6 +35,7 @@ #include <net/sock.h> #include <net/compat.h> #include <net/scm.h> +#include <net/cls_cgroup.h> /* @@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); - if (sock) + if (sock) { sock_update_netprioidx(sock->sk, current); + sock_update_classid(sock->sk, current); + } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bddc1dd2e7f2..6c1ad09f8796 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { .get = sock_pipe_buf_get, }; -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - /** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. + * skb_panic - private function for out-of-line support + * @skb: buffer + * @sz: size + * @addr: address + * @msg: skb_over_panic or skb_under_panic + * + * Out-of-line support for skb_put() and skb_push(). + * Called via the wrapper skb_over_panic() or skb_under_panic(). + * Keep out of line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always reliable. */ -static void skb_over_panic(struct sk_buff *skb, int sz, void *here) +static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, + const char msg[]) { pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, + msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -static void skb_under_panic(struct sk_buff *skb, int sz, void *here) +static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { - pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); - BUG(); + skb_panic(skb, sz, addr, __func__); } +static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) +{ + skb_panic(skb, sz, addr, __func__); +} /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells @@ -351,10 +341,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; @@ -686,7 +672,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; - new->inner_network_header = old->inner_transport_header; + new->inner_network_header = old->inner_network_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -2340,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; - + skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2847,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; diff --git a/net/core/sock.c b/net/core/sock.c index 235fb89e8973..f1e14e20d181 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2230,7 +2230,7 @@ EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) { - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d1b08045a9df..cfdb46ab3a7f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,6 +20,8 @@ #include <net/sock.h> #include <net/net_ratelimit.h> +static int one = 1; + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "dev_weight", diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 3aede1b459fd..856636addd76 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -95,7 +95,7 @@ static u32 dn_neigh_hash(const void *pkey, struct neigh_table dn_neigh_table = { .family = PF_DECnet, - .entry_size = sizeof(struct dn_neigh), + .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), .key_len = sizeof(__le16), .hash = dn_neigh_hash, .constructor = dn_neigh_construct, diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 09cba81d2c4a..43b95ca61114 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -377,17 +377,14 @@ static int lowpan_header_create(struct sk_buff *skb, struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; - u8 *head; + u8 head[100]; struct ieee802154_addr sa, da; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ if (type != ETH_P_IPV6) return 0; - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - head = kzalloc(100, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; hdr = ipv6_hdr(skb); hc06_ptr = head + 2; @@ -561,8 +558,6 @@ static int lowpan_header_create(struct sk_buff *skb, skb_pull(skb, sizeof(struct ipv6hdr)); memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); - kfree(head); - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); @@ -1267,7 +1262,7 @@ static inline int __init lowpan_netlink_init(void) return rtnl_link_register(&lowpan_link_ops); } -static inline void __init lowpan_netlink_fini(void) +static inline void lowpan_netlink_fini(void) { rtnl_link_unregister(&lowpan_link_ops); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 49ddca31c4da..e6e5d8506336 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,21 +263,6 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, __u8 protocol) -{ - const struct net_protocol *ipprot; - - if (net_eq(net, &init_net)) - return 1; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot == NULL) { - /* raw IP is OK */ - return 1; - } - return ipprot->netns_ok; -} - /* * Create an inet socket. */ @@ -350,10 +335,6 @@ lookup_protocol: !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; - err = -EAFNOSUPPORT; - if (!inet_netns_ok(net, protocol)) - goto out_rcu_unlock; - sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -1306,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | - SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a0d8392491c3..a69b4e4a02b5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -381,7 +385,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr; @@ -413,9 +420,12 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9547a273b9e9..ded146b217f1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -928,24 +928,25 @@ static void parp_redo(struct sk_buff *skb) static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct arphdr *arp; + const struct arphdr *arp; + + if (dev->flags & IFF_NOARP || + skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK) + goto freeskb; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out_of_mem; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; arp = arp_hdr(skb); - if (arp->ar_hln != dev->addr_len || - dev->flags & IFF_NOARP || - skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - arp->ar_pln != 4) + if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) goto freeskb; - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb == NULL) - goto out_of_mem; - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 424fafbc8cb0..b28e863fe0a7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -85,3 +85,28 @@ out: return err; } EXPORT_SYMBOL(ip4_datagram_connect); + +void ip4_datagram_release_cb(struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 fl4; + struct rtable *rt; + + if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) + return; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (!IS_ERR(rt)) + __sk_dst_set(sk, &rt->dst); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a8e4f2665d5e..5281314886c1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -63,6 +63,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } +static void check_lifetime(struct work_struct *work); + +static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); + static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -573,7 +582,107 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +static void check_lifetime(struct work_struct *work) +{ + unsigned long now, next, next_sec, next_sched; + struct in_ifaddr *ifa; + struct hlist_node *node; + int i; + + now = jiffies; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); + + rcu_read_lock(); + for (i = 0; i < IN4_ADDR_HSIZE; i++) { + hlist_for_each_entry_rcu(ifa, node, + &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap ; + + rtnl_lock(); + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &ifa->ifa_next) { + if (*ifap == ifa) + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + } + rtnl_unlock(); + } else if (ifa->ifa_preferred_lft == + INFINITY_LIFE_TIME) { + continue; + } else if (age >= ifa->ifa_preferred_lft) { + if (time_before(ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ, next)) + next = ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ; + + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } else if (time_before(ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ, + next)) { + next = ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ; + } + } + } + rcu_read_unlock(); + + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + now = jiffies; + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; + + schedule_delayed_work(&check_lifetime_work, next_sched - now); +} + +static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, + __u32 prefered_lft) +{ + unsigned long timeout; + + ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) + ifa->ifa_valid_lft = timeout; + else + ifa->ifa_flags |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa->ifa_flags |= IFA_F_DEPRECATED; + ifa->ifa_preferred_lft = timeout; + } + ifa->ifa_tstamp = jiffies; + if (!ifa->ifa_cstamp) + ifa->ifa_cstamp = ifa->ifa_tstamp; +} + +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, + __u32 *pvalid_lft, __u32 *pprefered_lft) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + err = -EINVAL; + goto errout; + } + *pvalid_lft = ci->ifa_valid; + *pprefered_lft = ci->ifa_prefered; + } + return ifa; errout: return ERR_PTR(err); } +static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct in_ifaddr *ifa1, **ifap; + + if (!ifa->ifa_local) + return NULL; + + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa) && + ifa1->ifa_local == ifa->ifa_local) + return ifa1; + } + return NULL; +} + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; + struct in_ifaddr *ifa_existing; + __u32 valid_lft = INFINITY_LIFE_TIME; + __u32 prefered_lft = INFINITY_LIFE_TIME; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh); + ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + ifa_existing = find_matching_ifa(ifa); + if (!ifa_existing) { + /* It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + } else { + inet_free_ifa(ifa); + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return -EEXIST; + + set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + } + return 0; } /* @@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; @@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ifa->ifa_dev = in_dev; ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); inet_insert_ifa(ifa); } } @@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void) + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; + ifm->ifa_flags = ifa->ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + preferred = ifa->ifa_preferred_lft; + valid = ifa->ifa_valid_lft; + if (preferred != INFINITY_LIFE_TIME) { + long tval = (jiffies - ifa->ifa_tstamp) / HZ; + + if (preferred > tval) + preferred -= tval; + else + preferred = 0; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } + } + } else { + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } if ((ifa->ifa_address && nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && @@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_broadcast && nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1988,6 +2191,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + schedule_delayed_work(&check_lifetime_work, 0); + rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b61e9deb7c7e..3b4f0cd2e63e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr[1]; @@ -499,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4750d2b74d79..2e453bde6992 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -73,8 +73,9 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { nf->nqueues = 0; - atomic_set(&nf->mem, 0); + init_frag_mem_limit(nf); INIT_LIST_HEAD(&nf->lru_list); + spin_lock_init(&nf->lru_lock); } EXPORT_SYMBOL(inet_frags_init_net); @@ -91,6 +92,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) local_bh_disable(); inet_frag_evictor(nf, f, true); local_bh_enable(); + + percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -98,9 +101,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - list_del(&fq->lru_list); fq->net->nqueues--; write_unlock(&f->lock); + inet_frag_lru_del(fq); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -117,12 +120,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb, int *work) + struct sk_buff *skb) { - if (work) - *work -= skb->truesize; - - atomic_sub(skb->truesize, &nf->mem); if (f->skb_free) f->skb_free(skb); kfree_skb(skb); @@ -133,6 +132,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, { struct sk_buff *fp; struct netns_frags *nf; + unsigned int sum, sum_truesize = 0; WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); @@ -143,13 +143,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(nf, f, fp, work); + sum_truesize += fp->truesize; + frag_kfree_skb(nf, f, fp); fp = xp; } - + sum = sum_truesize + f->qsize; if (work) - *work -= f->qsize; - atomic_sub(f->qsize, &nf->mem); + *work -= sum; + sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); @@ -164,22 +165,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) int work, evicted = 0; if (!force) { - if (atomic_read(&nf->mem) <= nf->high_thresh) + if (frag_mem_limit(nf) <= nf->high_thresh) return 0; } - work = atomic_read(&nf->mem) - nf->low_thresh; + work = frag_mem_limit(nf) - nf->low_thresh; while (work > 0) { - read_lock(&f->lock); + spin_lock(&nf->lru_lock); + if (list_empty(&nf->lru_list)) { - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); break; } q = list_first_entry(&nf->lru_list, struct inet_frag_queue, lru_list); atomic_inc(&q->refcnt); - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); spin_lock(&q->lock); if (!(q->last_in & INET_FRAG_COMPLETE)) @@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - list_add_tail(&qp->lru_list, &nf->lru_list); nf->nqueues++; write_unlock(&f->lock); + inet_frag_lru_add(nf, qp); return qp; } @@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - atomic_add(f->qsize, &nf->mem); + add_frag_mem_limit(q, f->qsize); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f55a4e61bfb8..1211613c6c34 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net) int ip_frag_mem(struct net *net) { - return atomic_read(&net->ipv4.frags.mem); + return sum_frag_mem_limit(&net->ipv4.frags); } static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, @@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) qp->user == arg->user; } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); @@ -340,6 +333,7 @@ static inline int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; + unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { atomic_inc(&qp->q.refcnt); @@ -349,9 +343,12 @@ static int ip_frag_reinit(struct ipq *qp) fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(qp->q.net, fp); + + sum_truesize += fp->truesize; + kfree_skb(fp); fp = xp; } while (fp); + sub_frag_mem_limit(&qp->q, sum_truesize); qp->q.last_in = 0; qp->q.len = 0; @@ -496,7 +493,8 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - frag_kfree_skb(qp->q.net, free_it); + sub_frag_mem_limit(&qp->q, free_it->truesize); + kfree_skb(free_it); } } @@ -519,7 +517,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - atomic_add(skb->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, skb->truesize); if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; @@ -531,9 +529,7 @@ found: qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); - write_lock(&ip4_frags.lock); - list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); - write_unlock(&ip4_frags.lock); + inet_frag_lru_move(&qp->q); return -EINPROGRESS; err: @@ -617,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -645,7 +641,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &qp->q.net->mem); + sub_frag_mem_limit(&qp->q, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af6be70821c4..00a14b9864ea 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -820,8 +820,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ttl = tiph->ttl; tos = tiph->tos; - if (tos == 1) { - tos = 0; + if (tos & 0x1) { + tos &= ~0x1; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) @@ -965,8 +965,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ptr--; } if (tunnel->parms.o_flags&GRE_CSUM) { + int offset = skb_transport_offset(skb); + *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, + skb->len - offset, + 0)); } } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f1395a6fb35f..87abd3e2bd32 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb) if (ipprot != NULL) { int ret; - if (!net_eq(net, &init_net) && !ipprot->netns_ok) { - net_info_ratelimited("%s: proto %d isn't netns-ready\n", - __func__, protocol); - kfree_skb(skb); - goto out; - } - if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2bff55..5e12dca7b3dd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -598,6 +598,7 @@ slow_path: /* for offloaded checksums cleanup checksum before fragmentation */ if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) goto fail; + iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index d3ab47e19a89..f01d1b1aff7f 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } @@ -160,6 +163,7 @@ static const struct net_protocol ipcomp4_protocol = { .handler = xfrm4_rcv, .err_handler = ipcomp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ipcomp4_init(void) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b5ef3cba2250..7d168dcbd135 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -88,10 +88,8 @@ static void ulog_send(unsigned int nlgroupnum) { ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; - if (timer_pending(&ub->timer)) { - pr_debug("ulog_send: timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("ulog_send: timer is deleting\n"); + del_timer(&ub->timer); if (!ub->skb) { pr_debug("ulog_send: nothing to send\n"); @@ -426,10 +424,8 @@ static void __exit ulog_tg_exit(void) /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) { - pr_debug("timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("timer is deleting\n"); + del_timer(&ub->timer); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8f3d05424a3e..6f9c07268cf6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -738,6 +738,7 @@ struct proto ping_prot = { .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = ping_v4_hash, .unhash = ping_v4_unhash, .get_port = ping_v4_get_port, diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0f9d09f54bd9..ce848461acbb 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { + if (!prot->netns_ok) { + pr_err("Protocol %u is not namespace aware, cannot register.\n", + protocol); + return -EINVAL; + } + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 73d1e4df4bf6..6f08991409c3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -894,6 +894,7 @@ struct proto raw_prot = { .recvmsg = raw_recvmsg, .bind = raw_bind, .backlog_rcv = raw_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 844a9ef60dbd..a0fcc47fee73 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + if (dst->dev->mtu < mtu) return; @@ -962,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, } EXPORT_SYMBOL_GPL(ipv4_update_pmtu); -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; @@ -975,6 +978,53 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) ip_rt_put(rt); } } + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; + struct dst_entry *dst; + bool new = false; + + bh_lock_sock(sk); + rt = (struct rtable *) __sk_dst_get(sk); + + if (sock_owned_by_user(sk) || !rt) { + __ipv4_sk_update_pmtu(skb, sk, mtu); + goto out; + } + + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!__sk_dst_check(sk, 0)) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + new = true; + } + + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + + dst = dst_check(&rt->dst, 0); + if (!dst) { + if (new) + dst_release(&rt->dst); + + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + new = true; + } + + if (new) + __sk_dst_set(sk, &rt->dst); + +out: + bh_unlock_sock(sk); +} EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, @@ -1120,7 +1170,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu && rt_is_output_route(rt)) + if (mtu) return mtu; mtu = dst->dev->mtu; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a25e1d286b99..960fd29d9b8e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -27,6 +27,7 @@ #include <net/tcp_memcontrol.h> static int zero; +static int one = 1; static int two = 2; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -549,14 +550,16 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_app_win", @@ -630,13 +633,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_abc", - .data = &sysctl_tcp_abc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), @@ -779,7 +775,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { .procname = "udp_wmem_min", @@ -787,7 +783,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ec1f69c5ceb..1f0bedb8622f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk) tcp_enable_early_retrans(tp); icsk->icsk_ca_ops = &tcp_init_congestion_ops; + tp->tsoffset = 0; + sk->sk_state = TCP_CLOSE; sk->sk_write_space = sk_stream_write_space; @@ -895,8 +897,7 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } - - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -2289,7 +2290,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->packets_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); @@ -2713,6 +2713,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2961,6 +2967,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } @@ -3034,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 291f2ed7cc31..019c2389a341 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -310,35 +310,24 @@ void tcp_slow_start(struct tcp_sock *tp) { int cnt; /* increase in packets */ unsigned int delta = 0; + u32 snd_cwnd = tp->snd_cwnd; - /* RFC3465: ABC Slow start - * Increase only after a full MSS of bytes is acked - * - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) - return; + if (unlikely(!snd_cwnd)) { + pr_err_once("snd_cwnd is nul, please report this bug.\n"); + snd_cwnd = 1U; + } if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else - cnt = tp->snd_cwnd; /* exponential increase */ - - /* RFC3465: ABC - * We MAY increase by 2 if discovered delayed ack - */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) - cnt <<= 1; - tp->bytes_acked = 0; + cnt = snd_cwnd; /* exponential increase */ tp->snd_cwnd_cnt += cnt; - while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd_cnt -= tp->snd_cwnd; + while (tp->snd_cwnd_cnt >= snd_cwnd) { + tp->snd_cwnd_cnt -= snd_cwnd; delta++; } - tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp); + tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_slow_start); @@ -372,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - /* In dangerous area, increase slowly. */ - else if (sysctl_tcp_abc) { - /* RFC3465: Appropriate Byte Count - * increase once for each full cwnd acked - */ - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + else tcp_cong_avoid_ai(tp, tp->snd_cwnd); - } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 492c7cfe1453..a759e19496d2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -1240,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type |= sk->sk_gso_type; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->gso_type = 0; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ @@ -2007,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tp->frto_counter = 0; - tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -2056,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->bytes_acked = 0; tcp_clear_retrans_partial(tp); if (tcp_is_reno(tp)) @@ -2684,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; - tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2735,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; - tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; tcp_init_cwnd_reduction(sk, set_ssthresh); @@ -3417,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -3502,6 +3496,11 @@ static bool tcp_process_frto(struct sock *sk, int flag) } } else { if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { + if (!tcp_packets_in_flight(tp)) { + tcp_enter_frto_loss(sk, 2, flag); + return true; + } + /* Prevent sending of new data. */ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)); @@ -3608,15 +3607,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; - if (sysctl_tcp_abc) { - if (icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; - else if (icsk->icsk_ca_state == TCP_CA_Loss) - /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, - tp->mss_cache); - } - prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); @@ -3870,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; return true; } return false; @@ -3894,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, if (tcp_parse_aligned_timestamp(tp, th)) return true; } + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; + return true; } @@ -5647,8 +5641,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, * the remote receives only the retransmitted (regular) SYNs: either * the original SYN-data or the corresponding SYN-ACK is lost. */ - syn_drop = (cookie->len <= 0 && data && - inet_csk(sk)->icsk_retransmits); + syn_drop = (cookie->len <= 0 && data && tp->total_retrans); tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); @@ -5676,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bbbdcc5c1973..77f5050efc8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * We do take care of PMTU discovery (RFC1191) special case : * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk) && - type != ICMP_DEST_UNREACH && - code != ICMP_FRAG_NEEDED) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); - + if (sock_owned_by_user(sk)) { + if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + } if (sk->sk_state == TCP_CLOSE) goto out; @@ -497,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -726,7 +726,7 @@ release_sk1: */ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts, int oif, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) { @@ -747,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - if (ts) { + if (tsecr) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - rep.opt[1] = htonl(tcp_time_stamp); - rep.opt[2] = htonl(ts); + rep.opt[1] = htonl(tsval); + rep.opt[2] = htonl(tsecr); arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; } @@ -767,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, #ifdef CONFIG_TCP_MD5SIG if (key) { - int offset = (ts) ? 3 : 0; + int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -802,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -821,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -1502,8 +1504,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) @@ -1668,6 +1672,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; } EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f35f2dfb6401..b83a49cc3816 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { + tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); @@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tcptw->tw_ts_offset = tp->tsoffset; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -446,7 +448,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, */ newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->bytes_acked = 0; newtp->frto_counter = 0; newtp->frto_highmark = 0; @@ -500,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + newtp->tsoffset = 0; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ if (newtp->af_specific->md5_lookup(sk, newsk)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 367e2ec01da1..6182d90e97b0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcb ? tcb->when : 0; + opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type |= sk->sk_gso_type; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e0610e4b5158..6791aac06ea9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1977,6 +1977,7 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80d59802d964..86c235d05aba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } -#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) -#define ADDRCONF_TIMER_FUZZ (HZ / 4) -#define ADDRCONF_TIMER_FUZZ_MAX (HZ) - #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -248,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) @@ -432,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -615,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev) if (idev->cnf.forwarding) dev_disable_lro(dev); if (dev->flags & IFF_MULTICAST) { - if (idev->cnf.forwarding) + if (idev->cnf.forwarding) { ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); - else + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters); + } else { ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters); + } } list_for_each_entry(ifa, &idev->addr_list, if_list) { @@ -1660,6 +1667,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) if (dev->addr_len != IEEE802154_ADDR_LEN) return -1; memcpy(eui, dev->dev_addr, 8); + eui[0] ^= 2; return 0; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ecc35b93314b..384233188ac1 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -593,9 +596,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); - skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); + err = nexthdr; out_free: diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 757a810d8f15..921b8b398a8c 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,7 +47,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); /* Big ac list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_ac_lock); +static DEFINE_SPINLOCK(ipv6_sk_ac_lock); /* @@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_ac_inc(dev, addr); if (!err) { - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } @@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) prev_pac = pac; } if (!pac) { - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); return -ENOENT; } if (prev_pac) @@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); @@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; rcu_read_lock(); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 33be36398a78..f5a54782a340 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -468,7 +468,8 @@ out: } -int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) +int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_skb_parm *opt = IP6CB(skb); @@ -598,11 +599,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); -int datagram_send_ctl(struct net *net, struct sock *sk, - struct msghdr *msg, struct flowi6 *fl6, - struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) +int ip6_datagram_send_ctl(struct net *net, struct sock *sk, + struct msghdr *msg, struct flowi6 *fl6, + struct ipv6_txoptions *opt, + int *hlimit, int *tclass, int *dontfrag) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -872,4 +874,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk, exit_f: return err; } -EXPORT_SYMBOL_GPL(datagram_send_ctl); +EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 282f3723ee19..40ffd72243a4 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -hdr_len); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -hdr_len); err = nexthdr[1]; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4a9fd51dae7..fff5bdd8b680 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } +static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct net *net = dev_net(skb->dev); + + if (type == ICMPV6_PKT_TOOBIG) + ip6_update_pmtu(skb, net, info, 0, 0); + else if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); +} + static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, + .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4297a393678..b386a2ce4c6f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -34,7 +34,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, const struct hlist_node *node; int reuse = sk->sk_reuse; int reuseport = sk->sk_reuseport; - int uid = sock_i_uid((struct sock *)sk); + kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ /* diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 29124b7a04c8..a7da2f472479 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -51,25 +51,38 @@ #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) static atomic_t fl_size = ATOMIC_INIT(0); -static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; +static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ -static DEFINE_RWLOCK(ip6_fl_lock); +static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ -static DEFINE_RWLOCK(ip6_sk_fl_lock); +static DEFINE_SPINLOCK(ip6_sk_fl_lock); +#define for_each_fl_rcu(hash, fl) \ + for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) +#define for_each_fl_continue_rcu(fl) \ + for (fl = rcu_dereference_bh(fl->next); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) + +#define for_each_sk_fl_rcu(np, sfl) \ + for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + sfl != NULL; \ + sfl = rcu_dereference_bh(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + for_each_fl_rcu(FL_HASH(label), fl) { if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } @@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); fl = __fl_lookup(net, label); - if (fl) - atomic_inc(&fl->users); - read_unlock_bh(&ip6_fl_lock); + if (fl && !atomic_inc_not_zero(&fl->users)) + fl = NULL; + rcu_read_unlock_bh(); return fl; } @@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl) put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); + kfree_rcu(fl, rcu); } - kfree(fl); } static void fl_release(struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { @@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl) time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static void ip6_fl_gc(unsigned long dummy) @@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy) unsigned long now = jiffies; unsigned long sched = 0; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl=*flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (atomic_read(&fl->users) == 0) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) @@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy) if (sched) { mod_timer(&ip6_fl_gc_timer, sched); } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static void __net_exit ip6_fl_purge(struct net *net) { int i; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl = *flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (net_eq(fl->fl_net, net) && atomic_read(&fl->users) == 0) { *flp = fl->next; @@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, @@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; @@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl != NULL) { atomic_inc(&lfl->users); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return lfl; } } fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; - fl_ht[FL_HASH(fl->label)] = fl; + rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return NULL; } @@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return fl; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return NULL; } @@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; - while ((sfl = np->ipv6_fl_list) != NULL) { + if (!rcu_access_pointer(np->ipv6_fl_list)) + return; + + spin_lock_bh(&ip6_sk_fl_lock); + while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { np->ipv6_fl_list = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); + + spin_lock_bh(&ip6_sk_fl_lock); } + spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ @@ -365,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, - &junk, &junk); + err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, + &junk, &junk, &junk); if (err) goto done; err = -EINVAL; @@ -424,7 +449,7 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) + for_each_sk_fl_rcu(np, sfl) count++; if (room <= 0 || @@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl; - write_unlock_bh(&ip6_sk_fl_lock); + rcu_assign_pointer(np->ipv6_fl_list, sfl); + spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) @@ -493,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: - write_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = rcu_dereference(*sflp))!=NULL; + sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - write_unlock_bh(&ip6_sk_fl_lock); + *sflp = rcu_dereference(sfl->next); + spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); return 0; } } - write_unlock_bh(&ip6_sk_fl_lock); + spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return err; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (freq.flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -541,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label) { err = -EEXIST; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_flags&IPV6_FL_F_EXCL) { - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); goto done; } fl1 = sfl->fl; @@ -553,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) break; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (fl1 == NULL) fl1 = fl_lookup(net, freq.flr_label); @@ -641,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { - fl = fl_ht[state->bucket]; - - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - if (fl) - break; + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } } + fl = NULL; +out: return fl; } @@ -656,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); - fl = fl->next; + for_each_fl_continue_rcu(fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + try_again: - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - - while (!fl) { - if (++state->bucket <= FL_HASH_MASK) { - fl = fl_ht[state->bucket]; - goto try_again; - } else - break; + if (++state->bucket <= FL_HASH_MASK) { + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + goto try_again; } + fl = NULL; + +out: return fl; } @@ -681,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ip6_fl_lock) + __acquires(RCU) { - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -700,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) - __releases(ip6_fl_lock) + __releases(RCU) { - read_unlock_bh(&ip6_fl_lock); + rcu_read_unlock_bh(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index db91fe3466a3..e4efffe2522e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -958,7 +958,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, int ret; if (!ip6_tnl_xmit_ctl(t)) - return -1; + goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4ac5bf30e16a..5b10414e619e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 2.7 + * Nodes must not originate a packet to a multicast address whose scope + * field contains the reserved value 0; if such a packet is received, it + * must be silently dropped. + */ + if (ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) + goto err; + /* * RFC4291 2.7 * Multicast addresses must not be used as source addresses in IPv6 diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d141fc32a2ea..f26f0da7f095 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,7 +100,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7dea45af66d3..155eccfa7760 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -120,6 +120,13 @@ static int ip6_finish_output2(struct sk_buff *skb) IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, skb->len); + + if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= + IPV6_ADDR_SCOPE_NODELOCAL && + !(dev->flags & IFF_LOOPBACK)) { + kfree_skb(skb); + return 0; + } } rcu_read_lock_bh(); @@ -242,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, dst->dev, dst_output); } - net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -1190,10 +1196,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; + exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; - dst_exthdrlen = rt->dst.header_len; + dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index acc32494006a..351ce98e90d9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1766,6 +1766,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -EINVAL; if (get_user(v, (u32 __user *)optval)) return -EFAULT; + /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (v != RT_TABLE_DEFAULT && v >= 100000000) + return -EINVAL; if (sk == mrt->mroute6_sk) return -EBUSY; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ee94d31c9d4d..d1e2e8ef29c5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -476,8 +476,8 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, - &junk); + retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, + &junk, &junk); if (retv) goto done; update: @@ -1002,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); if (skb) { - int err = datagram_recv_ctl(sk, &msg, skb); + int err = ip6_datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f25002aaf624..3a2849f660ca 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; /* err = -EADDRNOTAVAIL */ rv = !0; for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, - sizeof(struct in6_addr)); + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; } @@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr)); - if (rv == 0) - break; + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); + if (rv == 0) /* There is an error in the address. */ + goto done; } - if (rv == 0) /* address already there is an error */ - goto done; for (j=psl->sl_count-1; j>=i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; @@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; @@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 7302b0b7b642..83acc1405a18 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> +#include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> @@ -18,11 +19,20 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; __wsum src_sum = 0, dst_sum = 0; + struct in6_addr pfx; unsigned int i; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; + /* Ensure that LSB of prefix is zero */ + ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) + return -EINVAL; + ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { src_sum = csum_add(src_sum, (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); @@ -30,7 +40,7 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); } - npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum); + npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } @@ -51,7 +61,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, idx = i / 32; addr->s6_addr32[idx] &= mask; - addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) @@ -66,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, return false; } - sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx], - npt->adjustment); + sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), + csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 7431121b87de..6134a1ebfb1b 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> +#include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || + !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3dacecc99065..c674f158efa8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -319,7 +319,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -328,9 +328,8 @@ found: fq->nhoffset = nhoff; fq->q.last_in |= INET_FRAG_FIRST_IN; } - write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&nf_frags.lock); + + inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -398,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -422,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, head->truesize); head->local_df = 1; head->next = NULL; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6cd29b1e8b92..70fa81449997 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -507,7 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sock_recv_ts_and_drops(msg, sk, skb); if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); err = copied; if (flags & MSG_TRUNC) @@ -822,8 +822,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e5253ec9e0fc..bab2c270f292 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -327,7 +327,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -341,9 +341,7 @@ found: fq->q.meat == fq->q.len) return ip6_frag_reasm(fq, prev, dev); - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&ip6_frags.lock); + inet_frag_lru_move(&fq->q); return -1; discard_fq: @@ -429,7 +427,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -467,7 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98fe53694a65..02f96dcbcf02 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; @@ -590,6 +592,15 @@ out: return err; } +static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, + const struct in6_addr *v6addr) +{ + __be32 v4embed = 0; + if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) + return true; + return false; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -608,10 +619,19 @@ static int ipip6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; + } + } else { + if (is_spoofed_6rd(tunnel, iph->saddr, + &ipv6_hdr(skb)->saddr) || + is_spoofed_6rd(tunnel, iph->daddr, + &ipv6_hdr(skb)->daddr)) { + tunnel->dev->stats.rx_errors++; + goto out; + } } __skb_tunnel_rx(skb, tunnel->dev); @@ -645,14 +665,12 @@ out: } /* - * Returns the embedded IPv4 address if the IPv6 address - * comes from 6rd / 6to4 (RFC 3056) addr space. + * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function + * stores the embedded IPv4 address in v4dst and returns true. */ -static inline -__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst) { - __be32 dst = 0; - #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { @@ -671,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); - dst = tunnel->ip6rd.relay_prefix | htonl(d); + *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); + return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ - memcpy(&dst, &v6dst->s6_addr16[1], 4); + memcpy(v4dst, &v6dst->s6_addr16[1], 4); + return true; } #endif + return false; +} + +static inline __be32 try_6rd(struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + __be32 dst = 0; + check_6rd(tunnel, v6dst, &dst); return dst; } @@ -739,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6rd(&iph6->daddr, tunnel); + dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 06087e58738a..9b6460055df5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -423,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; case TCP_SYN_SENT: @@ -712,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, + struct tcp_md5sig_key *key, int rst, u8 tclass) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -724,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct dst_entry *dst; __be32 *topt; - if (ts) + if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (key) @@ -754,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, topt = (__be32 *)(t1 + 1); - if (ts) { + if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt++ = htonl(ts); + *topt++ = htonl(tsval); + *topt++ = htonl(tsecr); } #ifdef CONFIG_TCP_MD5SIG @@ -859,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -870,10 +872,11 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -883,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tw->tw_tclass); @@ -892,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } @@ -959,8 +964,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; + } req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) @@ -1109,6 +1116,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index cb5bf497c09c..599e1ba6d1ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -467,7 +467,7 @@ try_again: ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_ctl(sk, msg, skb); } err = copied; @@ -1143,8 +1143,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index babd1674388a..6cc48012b730 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -94,7 +94,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const hlist_for_each_entry_rcu(x6spi, pos, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) + if (xfrm6_addr_equal(&x6spi->addr, saddr)) return x6spi; } @@ -211,7 +211,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { + if (xfrm6_addr_equal(&x6spi->addr, saddr)) { if (atomic_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index b833677d83d6..d07e3a626446 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2567,8 +2567,7 @@ bed: err); /* If watchdog is still activated, kill it! */ - if(timer_pending(&(self->watchdog))) - del_timer(&(self->watchdog)); + del_timer(&(self->watchdog)); IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__); diff --git a/net/key/af_key.c b/net/key/af_key.c index cb75f9b3105e..7b3ba32ca678 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -761,7 +761,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* identity & sensitivity */ - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family)) + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { @@ -911,8 +911,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, if (!addr->sadb_address_prefixlen) BUG(); - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, - x->props.family)) { + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, + x->props.family)) { addr = (struct sadb_address*) skb_put(skb, sizeof(struct sadb_address)+sockaddr_size); addr->sadb_address_len = @@ -1323,7 +1323,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); - if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1a9f3723c13c..dcfd64e83ab7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -101,6 +101,7 @@ struct l2tp_skb_cb { static atomic_t l2tp_tunnel_count; static atomic_t l2tp_session_count; +static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; @@ -122,7 +123,6 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net) return net_generic(net, l2tp_net_id); } - /* Tunnel reference counts. Incremented per session that is added to * the tunnel. */ @@ -168,6 +168,51 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) } +/* Lookup the tunnel socket, possibly involving the fs code if the socket is + * owned by userspace. A struct sock returned from this function must be + * released using l2tp_tunnel_sock_put once you're done with it. + */ +struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +{ + int err = 0; + struct socket *sock = NULL; + struct sock *sk = NULL; + + if (!tunnel) + goto out; + + if (tunnel->fd >= 0) { + /* Socket is owned by userspace, who might be in the process + * of closing it. Look the socket up using the fd to ensure + * consistency. + */ + sock = sockfd_lookup(tunnel->fd, &err); + if (sock) + sk = sock->sk; + } else { + /* Socket is owned by kernelspace */ + sk = tunnel->sock; + } + +out: + return sk; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); + +/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ +void l2tp_tunnel_sock_put(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + if (tunnel->fd >= 0) { + /* Socket is owned by userspace */ + sockfd_put(sk->sk_socket); + } + sock_put(sk); + } +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); + /* Lookup a session by id in the global session list */ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) @@ -1123,8 +1168,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len struct udphdr *uh; struct inet_sock *inet; __wsum csum; - int old_headroom; - int new_headroom; int headroom; int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; int udp_len; @@ -1136,16 +1179,12 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len */ headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + hdr_len; - old_headroom = skb_headroom(skb); if (skb_cow_head(skb, headroom)) { kfree_skb(skb); return NET_XMIT_DROP; } - new_headroom = skb_headroom(skb); skb_orphan(skb); - skb->truesize += new_headroom - old_headroom; - /* Setup L2TP header */ session->build_header(session, __skb_push(skb, hdr_len)); @@ -1232,6 +1271,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel; + struct l2tp_net *pn; tunnel = sk->sk_user_data; if (tunnel == NULL) @@ -1239,9 +1279,8 @@ static void l2tp_tunnel_destruct(struct sock *sk) l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); - /* Close all sessions */ - l2tp_tunnel_closeall(tunnel); + /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ @@ -1253,17 +1292,23 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ - tunnel->sock = NULL; sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + tunnel->sock = NULL; - /* Call the original destructor */ - if (sk->sk_destruct) - (*sk->sk_destruct)(sk); + /* Remove the tunnel struct from the tunnel list */ + pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + atomic_dec(&l2tp_tunnel_count); - /* We're finished with the socket */ + l2tp_tunnel_closeall(tunnel); l2tp_tunnel_dec_refcount(tunnel); + /* Call the original destructor */ + if (sk->sk_destruct) + (*sk->sk_destruct)(sk); end: return; } @@ -1337,48 +1382,77 @@ again: */ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - BUG_ON(atomic_read(&tunnel->ref_count) != 0); BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - - /* Remove from tunnel list */ - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); kfree_rcu(tunnel, rcu); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +} - atomic_dec(&l2tp_tunnel_count); +/* Workqueue tunnel deletion function */ +static void l2tp_tunnel_del_work(struct work_struct *work) +{ + struct l2tp_tunnel *tunnel = NULL; + struct socket *sock = NULL; + struct sock *sk = NULL; + + tunnel = container_of(work, struct l2tp_tunnel, del_work); + sk = l2tp_tunnel_sock_lookup(tunnel); + if (!sk) + return; + + sock = sk->sk_socket; + BUG_ON(!sock); + + /* If the tunnel socket was created directly by the kernel, use the + * sk_* API to release the socket now. Otherwise go through the + * inet_* layer to shut the socket down, and let userspace close it. + * In either case the tunnel resources are freed in the socket + * destructor when the tunnel socket goes away. + */ + if (sock->file == NULL) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); + } else { + inet_shutdown(sock, 2); + } + + l2tp_tunnel_sock_put(sk); } /* Create a socket for the tunnel, if one isn't set up by * userspace. This is used for static tunnels where there is no * managing L2TP daemon. + * + * Since we don't want these sockets to keep a namespace alive by + * themselves, we drop the socket's namespace refcount after creation. + * These sockets are freed when the namespace exits using the pernet + * exit hook. */ -static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp) +static int l2tp_tunnel_sock_create(struct net *net, + u32 tunnel_id, + u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, + struct socket **sockp) { int err = -EINVAL; - struct sockaddr_in udp_addr; + struct socket *sock = NULL; + struct sockaddr_in udp_addr = {0}; + struct sockaddr_l2tpip ip_addr = {0}; #if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 udp6_addr; - struct sockaddr_l2tpip6 ip6_addr; + struct sockaddr_in6 udp6_addr = {0}; + struct sockaddr_l2tpip6 ip6_addr = {0}; #endif - struct sockaddr_l2tpip ip_addr; - struct socket *sock = NULL; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -1400,13 +1474,12 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp_addr, 0, sizeof(udp_addr)); udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = htons(cfg->local_udp_port); @@ -1433,14 +1506,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip6_addr, 0, sizeof(ip6_addr)); ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1462,14 +1534,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip_addr, 0, sizeof(ip_addr)); ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1493,8 +1564,10 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } out: + *sockp = sock; if ((err < 0) && sock) { - sock_release(sock); + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); *sockp = NULL; } @@ -1517,15 +1590,23 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 * kernel socket. */ if (fd < 0) { - err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock); + err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, + cfg, &sock); if (err < 0) goto err; } else { - err = -EBADF; sock = sockfd_lookup(fd, &err); if (!sock) { - pr_err("tunl %hu: sockfd_lookup(fd=%d) returned %d\n", + pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n", tunnel_id, fd, err); + err = -EBADF; + goto err; + } + + /* Reject namespace mismatches */ + if (!net_eq(sock_net(sock->sk), net)) { + pr_err("tunl %u: netns mismatch\n", tunnel_id); + err = -EINVAL; goto err; } } @@ -1607,10 +1688,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + tunnel->fd = fd; lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; + /* Init delete workqueue struct */ + INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); + /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); atomic_inc(&l2tp_tunnel_count); @@ -1642,25 +1727,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - int err = 0; - struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL; - - /* Force the tunnel socket to close. This will eventually - * cause the tunnel to be deleted via the normal socket close - * mechanisms when userspace closes the tunnel socket. - */ - if (sock != NULL) { - err = inet_shutdown(sock, 2); - - /* If the tunnel's socket was created by the kernel, - * close the socket here since the socket was not - * created by userspace. - */ - if (sock->file == NULL) - err = inet_release(sock); - } - - return err; + return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); @@ -1844,8 +1911,21 @@ static __net_init int l2tp_init_net(struct net *net) return 0; } +static __net_exit void l2tp_exit_net(struct net *net) +{ + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel = NULL; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + (void)l2tp_tunnel_delete(tunnel); + } + rcu_read_unlock_bh(); +} + static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, + .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; @@ -1858,6 +1938,13 @@ static int __init l2tp_init(void) if (rc) goto out; + l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + if (!l2tp_wq) { + pr_err("alloc_workqueue failed\n"); + rc = -ENOMEM; + goto out; + } + pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION); out: @@ -1867,6 +1954,10 @@ out: static void __exit l2tp_exit(void) { unregister_pernet_device(&l2tp_net_ops); + if (l2tp_wq) { + destroy_workqueue(l2tp_wq); + l2tp_wq = NULL; + } } module_init(l2tp_init); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 56d583e083a7..8eb8f1d47f3a 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -188,7 +188,10 @@ struct l2tp_tunnel { int (*recv_payload_hook)(struct sk_buff *skb); void (*old_sk_destruct)(struct sock *); struct sock *sock; /* Parent socket */ - int fd; + int fd; /* Parent fd, if tunnel socket + * was created by userspace */ + + struct work_struct del_work; uint8_t priv[0]; /* private data */ }; @@ -228,6 +231,8 @@ out: return tunnel; } +extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); +extern void l2tp_tunnel_sock_put(struct sock *sk); extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 61d8b75d2686..f7ac8f42fee2 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -115,6 +115,7 @@ static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, in */ static int l2tp_ip_recv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct sock *sk; u32 session_id; u32 tunnel_id; @@ -142,7 +143,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(&init_net, NULL, session_id); + session = l2tp_session_find(net, NULL, session_id); if (session == NULL) goto discard; @@ -173,14 +174,14 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(&init_net, tunnel_id); + tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel != NULL) sk = tunnel->sock; else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); read_unlock_bh(&l2tp_ip_lock); } @@ -239,6 +240,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; + struct net *net = sock_net(sk); int ret; int chk_addr_ret; @@ -251,7 +253,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) ret = -EADDRINUSE; read_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + sk->sk_bound_dev_if, addr->l2tp_conn_id)) goto out_in_use; read_unlock_bh(&l2tp_ip_lock); @@ -260,7 +263,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; - chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr); + chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr); ret = -EADDRNOTAVAIL; if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -369,7 +372,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); return -1; } @@ -605,6 +608,7 @@ static struct inet_protosw l2tp_ip_protosw = { static struct net_protocol l2tp_ip_protocol __read_mostly = { .handler = l2tp_ip_recv, + .netns_ok = 1, }; static int __init l2tp_ip_init(void) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 927547171bc7..8ee4a86ae996 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -554,8 +554,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -646,7 +646,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { - struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name; size_t copied = 0; int err = -EOPNOTSUPP; @@ -688,8 +688,8 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_scope_id = IP6CB(skb)->iif; } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + if (np->rxopt.all) + ip6_datagram_recv_ctl(sk, msg, skb); if (flags & MSG_TRUNC) copied = skb->len; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bbba3a19e944..c1bab22db85e 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -37,6 +37,7 @@ static struct genl_family l2tp_nl_family = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .netnsok = true, }; /* Accessed under genl lock */ diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 286366ef8930..716605c241f4 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -388,8 +388,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) struct l2tp_session *session; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; - int old_headroom; - int new_headroom; int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) @@ -408,7 +406,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (tunnel == NULL) goto abort_put_sess; - old_headroom = skb_headroom(skb); uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; headroom = NET_SKB_PAD + sizeof(struct iphdr) + /* IP header */ @@ -418,9 +415,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; - new_headroom = skb_headroom(skb); - skb->truesize += new_headroom - old_headroom; - /* Setup PPP header */ __skb_push(skb, sizeof(ppph)); skb->data[0] = ppph[0]; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 808338a1bce5..31bf2586fb84 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -83,8 +83,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) sdata_info(sta->sdata, - "HW problem - can not stop rx aggregation for tid %d\n", - tid); + "HW problem - can not stop rx aggregation for %pM tid %d\n", + sta->sta.addr, tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) @@ -159,7 +159,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) } rcu_read_unlock(); - ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); @@ -247,7 +248,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n"); + ht_dbg(sta->sdata, + "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", + sta->sta.addr, tid); goto end_no_lock; } @@ -317,7 +320,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); - ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret); + ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", + sta->sta.addr, tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index eb9df22418f0..13b7683de5a4 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -149,16 +149,133 @@ void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } +static inline int ieee80211_ac_from_tid(int tid) +{ + return ieee802_1d_to_ac[tid & 7]; +} + +/* + * When multiple aggregation sessions on multiple stations + * are being created/destroyed simultaneously, we need to + * refcount the global queue stop caused by that in order + * to not get into a situation where one of the aggregation + * setup or teardown re-enables queues before the other is + * ready to handle that. + * + * These two functions take care of this issue by keeping + * a global "agg_queue_stop" refcount. + */ +static void __acquires(agg_queue) +ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) + ieee80211_stop_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __acquire(agg_queue); +} + +static void __releases(agg_queue) +ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) + ieee80211_wake_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __release(agg_queue); +} + +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + struct ieee80211_local *local = sdata->local; + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + unsigned long flags; + + ieee80211_stop_queue_agg(sdata, tid); + + if (WARN(!tid_tx, + "TID %d gone but expected when splicing aggregates from the pending queue\n", + tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) +{ + ieee80211_wake_queue_agg(sdata, tid); +} + +static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) +{ + struct tid_ampdu_tx *tid_tx; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_held(&sta->lock); + + tid_tx = rcu_dereference_protected_tid_tx(sta, tid); + + /* + * When we get here, the TX path will not be lockless any more wrt. + * aggregation, since the OPERATIONAL bit has long been cleared. + * Thus it will block on getting the lock, if it occurs. So if we + * stop the queue now, we will not get any more packets, and any + * that might be being processed will wait for us here, thereby + * guaranteeing that no packets go to the tid_tx pending queue any + * more. + */ + + ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); + + /* future packets must not find the tid_tx struct any more */ + ieee80211_assign_tid_tx(sta, tid, NULL); + + ieee80211_agg_splice_finish(sta->sdata, tid); + + kfree_rcu(tid_tx, rcu_head); +} + int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; + enum ieee80211_ampdu_mlme_action action; int ret; lockdep_assert_held(&sta->ampdu_mlme.mtx); + switch (reason) { + case AGG_STOP_DECLINED: + case AGG_STOP_LOCAL_REQUEST: + case AGG_STOP_PEER_REQUEST: + action = IEEE80211_AMPDU_TX_STOP_CONT; + break; + case AGG_STOP_DESTROY_STA: + action = IEEE80211_AMPDU_TX_STOP_FLUSH; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + spin_lock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -167,10 +284,19 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } - /* if we're already stopping ignore any new requests to stop */ + /* + * if we're already stopping ignore any new requests to stop + * unless we're destroying it in which case notify the driver + */ if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { spin_unlock_bh(&sta->lock); - return -EALREADY; + if (reason != AGG_STOP_DESTROY_STA) + return -EALREADY; + ret = drv_ampdu_action(local, sta->sdata, + IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, + &sta->sta, tid, NULL, 0); + WARN_ON_ONCE(ret); + return 0; } if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { @@ -212,11 +338,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ synchronize_net(); - tid_tx->stop_initiator = initiator; - tid_tx->tx_stop = tx; + tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? + WLAN_BACK_RECIPIENT : + WLAN_BACK_INITIATOR; + tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; - ret = drv_ampdu_action(local, sta->sdata, - IEEE80211_AMPDU_TX_STOP, + ret = drv_ampdu_action(local, sta->sdata, action, &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ @@ -227,7 +354,17 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ } - return ret; + /* + * In the case of AGG_STOP_DESTROY_STA, the driver won't + * necessarily call ieee80211_stop_tx_ba_cb(), so this may + * seem like we can leave the tid_tx data pending forever. + * This is true, in a way, but "forever" is only until the + * station struct is actually destroyed. In the meantime, + * leaving it around ensures that we don't transmit packets + * to the driver on this TID which might confuse it. + */ + + return 0; } /* @@ -253,91 +390,18 @@ static void sta_addba_resp_timer_expired(unsigned long data) test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); ht_dbg(sta->sdata, - "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); + "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n", + sta->sta.addr, tid); return; } - ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid); + ht_dbg(sta->sdata, "addBA response timer expired on %pM tid %d\n", + sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); } -static inline int ieee80211_ac_from_tid(int tid) -{ - return ieee802_1d_to_ac[tid & 7]; -} - -/* - * When multiple aggregation sessions on multiple stations - * are being created/destroyed simultaneously, we need to - * refcount the global queue stop caused by that in order - * to not get into a situation where one of the aggregation - * setup or teardown re-enables queues before the other is - * ready to handle that. - * - * These two functions take care of this issue by keeping - * a global "agg_queue_stop" refcount. - */ -static void __acquires(agg_queue) -ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) - ieee80211_stop_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __acquire(agg_queue); -} - -static void __releases(agg_queue) -ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) - ieee80211_wake_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __release(agg_queue); -} - -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - struct ieee80211_local *local = sdata->local; - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - unsigned long flags; - - ieee80211_stop_queue_agg(sdata, tid); - - if (WARN(!tid_tx, - "TID %d gone but expected when splicing aggregates from the pending queue\n", - tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) -{ - ieee80211_wake_queue_agg(sdata, tid); -} - void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -369,7 +433,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) &sta->sta, tid, &start_seq_num, 0); if (ret) { ht_dbg(sdata, - "BA request denied - HW unavailable for tid %d\n", tid); + "BA request denied - HW unavailable for %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -382,7 +447,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -429,7 +495,8 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); - ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -465,7 +532,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sdata, - "BA sessions blocked - Denying BA session request\n"); + "BA sessions blocked - Denying BA session request %pM tid %d\n", + sta->sta.addr, tid); return -EINVAL; } @@ -506,8 +574,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { ht_dbg(sdata, - "BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); + "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid); ret = -EBUSY; goto err_unlock_sta; } @@ -516,8 +584,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { ht_dbg(sdata, - "BA request denied - session is not idle on tid %u\n", - tid); + "BA request denied - session is not idle on %pM tid %u\n", + sta->sta.addr, tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -572,7 +640,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid); + ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", + sta->sta.addr, tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -660,14 +729,13 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { int ret; mutex_lock(&sta->ampdu_mlme.mtx); - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx); + ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason); mutex_unlock(&sta->ampdu_mlme.mtx); @@ -743,7 +811,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_dbg(sdata, "unexpected callback to A-MPDU stop\n"); + ht_dbg(sdata, + "unexpected callback to A-MPDU stop for %pM tid %d\n", + sta->sta.addr, tid); goto unlock_sta; } @@ -751,24 +821,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - /* - * When we get here, the TX path will not be lockless any more wrt. - * aggregation, since the OPERATIONAL bit has long been cleared. - * Thus it will block on getting the lock, if it occurs. So if we - * stop the queue now, we will not get any more packets, and any - * that might be being processed will wait for us here, thereby - * guaranteeing that no packets go to the tid_tx pending queue any - * more. - */ - - ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); - - /* future packets must not find the tid_tx struct any more */ - ieee80211_assign_tid_tx(sta, tid, NULL); - - ieee80211_agg_splice_finish(sta->sdata, tid); - - kfree_rcu(tid_tx, rcu_head); + ieee80211_remove_tid_tx(sta, tid); unlock_sta: spin_unlock_bh(&sta->lock); @@ -819,13 +872,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { - ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid); + ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", + sta->sta.addr, tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); - ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid); + ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n", + sta->sta.addr, tid); /* * addba_resp_timer may have fired before we got here, and @@ -835,8 +890,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sta->sdata, - "got addBA resp for tid %d but we already gave up\n", - tid); + "got addBA resp for %pM tid %d but we already gave up\n", + sta->sta.addr, tid); goto out; } @@ -868,8 +923,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } } else { - ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, - false); + ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } out: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 47e0aca614b7..15d886c639e9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -164,7 +164,17 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sta = sta_info_get(sdata, mac_addr); else sta = sta_info_get_bss(sdata, mac_addr); - if (!sta) { + /* + * The ASSOC test makes sure the driver is ready to + * receive the key. When wpa_supplicant has roamed + * using FT, it attempts to set the key before + * association has completed, this rejects that attempt + * so it will set the key again after assocation. + * + * TODO: accept the key if we have a station entry and + * add it to the device after the station. + */ + if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free(sdata->local, key); err = -ENOENT; goto out_unlock; @@ -510,6 +520,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -521,6 +532,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -930,6 +943,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1010,8 +1024,15 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) kfree_rcu(old_probe_resp, rcu_head); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - sta_info_flush(local, vlan); - sta_info_flush(local, sdata); + sta_info_flush_defer(vlan); + sta_info_flush_defer(sdata); + rcu_barrier(); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + sta_info_flush_cleanup(vlan); + sta_info_flush_cleanup(sdata); + + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); drv_stop_ap(sdata->local, sdata); @@ -1069,6 +1090,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1086,52 +1159,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1177,10 +1218,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.aid = params->aid; /* - * FIXME: updating the following information is racy when this - * function is called from ieee80211_change_station(). - * However, all this information should be static so - * maybe we should just reject attemps to change it. + * Some of the following updates would be racy if called on an + * existing station, via ieee80211_change_station(). However, + * all such changes are rejected by cfg80211 except for updates + * changing the supported rates on an existing but not yet used + * TDLS peer. */ if (params->listen_interval >= 0) @@ -1211,18 +1253,33 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { + u32 changed = 0; + switch (params->plink_state) { - case NL80211_PLINK_LISTEN: case NL80211_PLINK_ESTAB: + if (sta->plink_state != NL80211_PLINK_ESTAB) + changed = mesh_plink_inc_estab_count( + sdata); + sta->plink_state = params->plink_state; + break; + case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + case NL80211_PLINK_HOLDING: + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count( + sdata); sta->plink_state = params->plink_state; break; default: /* nothing */ break; } - else + ieee80211_bss_info_change_notify(sdata, changed); + } else { switch (params->plink_action) { case PLINK_ACTION_OPEN: mesh_plink_open(sta); @@ -1231,6 +1288,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, mesh_plink_block(sta); break; } + } #endif } @@ -1265,6 +1323,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -1301,7 +1363,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1309,7 +1370,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, if (mac) return sta_info_destroy_addr_bss(sdata, mac); - sta_info_flush(local, sdata); + sta_info_flush(sdata); return 0; } @@ -1615,6 +1676,9 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.beacon_int = setup->beacon_interval; + sdata->vif.bss_conf.dtim_period = setup->dtim_period; + return 0; } @@ -1994,7 +2058,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(rate)); + memcpy(sdata->vif.bss_conf.mcast_rate, rate, + sizeof(int) * IEEE80211_NUM_BANDS); return 0; } @@ -2197,7 +2262,8 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 80e55527504b..1bfe0a8b19d2 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -381,7 +381,8 @@ void ieee80211_iter_chan_contexts_atomic( rcu_read_lock(); list_for_each_entry_rcu(ctx, &local->chanctx_list, list) - iter(hw, &ctx->conf, iter_data); + if (ctx->driver_present) + iter(hw, &ctx->conf, iter_data); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_atomic); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 698dc7e6f309..434b3c4f31b5 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -207,6 +207,14 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED) && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT); + WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE && + changed & ~BSS_CHANGED_IDLE); + check_sdata_in_driver(sdata); trace_drv_bss_info_changed(local, sdata, info, changed); @@ -561,7 +569,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, check_sdata_in_driver(sdata); WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && - sdata->vif.type != NL80211_IFTYPE_ADHOC); + (sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)); trace_drv_sta_rc_update(local, sdata, sta, changed); if (local->ops->sta_rc_update) @@ -837,11 +846,12 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, } static inline void drv_rssi_callback(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, const enum ieee80211_rssi_event event) { - trace_drv_rssi_callback(local, event); + trace_drv_rssi_callback(local, sdata, event); if (local->ops->rssi_callback) - local->ops->rssi_callback(&local->hw, event); + local->ops->rssi_callback(&local->hw, &sdata->vif, event); trace_drv_return_void(local); } @@ -913,6 +923,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, if (local->ops->add_chanctx) ret = local->ops->add_chanctx(&local->hw, &ctx->conf); trace_drv_return_int(local, ret); + if (!ret) + ctx->driver_present = true; return ret; } @@ -924,6 +936,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local, if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); trace_drv_return_void(local); + ctx->driver_present = false; } static inline void drv_change_chanctx(struct ieee80211_local *local, @@ -931,8 +944,10 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, u32 changed) { trace_drv_change_chanctx(local, ctx, changed); - if (local->ops->change_chanctx) + if (local->ops->change_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->change_chanctx(&local->hw, &ctx->conf, changed); + } trace_drv_return_void(local); } @@ -945,10 +960,12 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_assign_vif_chanctx(local, sdata, ctx); - if (local->ops->assign_vif_chanctx) + if (local->ops->assign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); ret = local->ops->assign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_int(local, ret); return ret; @@ -961,10 +978,12 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_unassign_vif_chanctx(local, sdata, ctx); - if (local->ops->unassign_vif_chanctx) + if (local->ops->unassign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->unassign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_void(local); } @@ -1003,4 +1022,32 @@ static inline void drv_restart_complete(struct ieee80211_local *local) trace_drv_return_void(local); } +static inline void +drv_set_default_unicast_key(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx) +{ + check_sdata_in_driver(sdata); + + WARN_ON_ONCE(key_idx < -1 || key_idx > 3); + + trace_drv_set_default_unicast_key(local, sdata, key_idx); + if (local->ops->set_default_unicast_key) + local->ops->set_default_unicast_key(&local->hw, &sdata->vif, + key_idx); + trace_drv_return_void(local); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline void drv_ipv6_addr_change(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct inet6_dev *idev) +{ + trace_drv_ipv6_addr_change(local, sdata); + if (local->ops->ipv6_addr_change) + local->ops->ipv6_addr_change(&local->hw, &sdata->vif, idev); + trace_drv_return_void(local); +} +#endif + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a71d891794a4..61ac7c48ac0c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -62,6 +62,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); + /* Allow user to disable SGI-20 (SGI-40 is handled above) */ + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20); + /* Allow user to disable the max-AMSDU bit. */ __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); @@ -117,6 +120,21 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40)); + + /* Unset 40 MHz if we're not using a 40 MHz channel */ + switch (sdata->vif.bss_conf.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + ht_cap->cap &= ~IEEE80211_HT_CAP_SGI_40; + ht_cap->cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + break; + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + break; + } + /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. @@ -179,16 +197,19 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, ht_cap); } -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx) +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason) { int i; cancel_work_sync(&sta->ampdu_mlme.work); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx); + __ieee80211_stop_tx_ba_session(sta, i, reason); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, tx); + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } } @@ -245,8 +266,7 @@ void ieee80211_ba_session_work(struct work_struct *work) if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) ___ieee80211_stop_tx_ba_session(sta, tid, - WLAN_BACK_INITIATOR, - true); + AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&sta->ampdu_mlme.mtx); } @@ -314,8 +334,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, true); else - __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - true); + __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6b7644e818d8..b4b866f41919 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -67,7 +67,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, sdata->local->hw.extra_tx_headroom); if (!ether_addr_equal(ifibss->bssid, bssid)) - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.bss_conf.ibss_joined) { @@ -191,6 +191,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; bss_change = BSS_CHANGED_BEACON_INT; @@ -425,11 +426,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -530,7 +529,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (!bss) return; @@ -877,14 +876,21 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) +static +void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { size_t baselen; struct ieee802_11_elems elems; + BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != + offsetof(typeof(mgmt->u.beacon), variable)); + + /* + * either beacon or probe_resp but the variable field is at the + * same offset + */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) return; @@ -892,25 +898,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); -} - -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) -{ - size_t baselen; - struct ieee802_11_elems elems; - - /* Process beacon from the current BSS */ - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -934,12 +922,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, - rx_status); - break; case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - rx_status); + ieee80211_rx_mgmt_probe_beacon(sdata, mgmt, skb->len, + rx_status); break; case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); @@ -1182,7 +1167,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) memset(ifibss->bssid, 0, ETH_ALEN); ifibss->ssid_len = 0; - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { @@ -1205,6 +1190,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); synchronize_rcu(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8563b9a5cac3..5fba867d9e2e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -405,6 +405,8 @@ struct ieee80211_mgd_assoc_data { u8 ap_ht_param; + struct ieee80211_vht_cap ap_vht_cap; + size_t ie_len; u8 ie[]; }; @@ -659,10 +661,13 @@ enum ieee80211_sub_if_data_flags { * change handling while the interface is up * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel * mode, so queues are stopped + * @SDATA_STATE_OFFCHANNEL_BEACON_STOPPED: Beaconing was stopped due + * to offchannel, reset when offchannel returns */ enum ieee80211_sdata_state_bits { SDATA_STATE_RUNNING, SDATA_STATE_OFFCHANNEL, + SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, }; /** @@ -685,6 +690,7 @@ struct ieee80211_chanctx { enum ieee80211_chanctx_mode mode; int refcount; + bool driver_present; struct ieee80211_chanctx_conf conf; }; @@ -741,8 +747,6 @@ struct ieee80211_sub_if_data { struct work_struct work; struct sk_buff_head skb_queue; - bool arp_filter_state; - u8 needed_rx_chains; enum ieee80211_smps_mode smps_mode; @@ -783,6 +787,11 @@ struct ieee80211_sub_if_data { struct dentry *default_mgmt_key; } debugfs; #endif + +#ifdef CONFIG_PM + struct ieee80211_bss_conf suspend_bss_conf; +#endif + /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; }; @@ -1118,6 +1127,7 @@ struct ieee80211_local { struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; struct notifier_block ifa_notifier; + struct notifier_block ifa6_notifier; /* * The dynamic ps timeout configured from user space via WEXT - @@ -1346,8 +1356,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon); + struct ieee80211_channel *channel); void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); @@ -1358,10 +1367,8 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, - bool offchannel_ps_enable); -void ieee80211_offchannel_return(struct ieee80211_local *local, - bool offchannel_ps_disable); +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); +void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); @@ -1422,7 +1429,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx); +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len); @@ -1436,11 +1444,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); void ieee80211_ba_session_work(struct work_struct *work); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8be854e86cd9..0a36dc6346bb 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -747,7 +747,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; - int i; + int i, flushed; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -772,11 +772,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * This is relevant only in AP, WDS and mesh modes, since in - * all other modes we've already removed all stations when - * disconnecting etc. + * This is relevant only in WDS mode, in all other modes we've + * already removed all stations when disconnecting or similar, + * so warn otherwise. + * + * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - sta_info_flush(local, sdata); + flushed = sta_info_flush_defer(sdata); + WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || + (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); /* * Don't count this interface for promisc/allmulti while it @@ -859,11 +863,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. + * + * sta_info_flush_cleanup() requires rcu_barrier() + * first to wait for the station call_rcu() calls + * to complete, here we need at least sychronize_rcu() + * it to wait for the RX path in case it is using the + * interface and enqueuing frames at this very time on + * another CPU. */ - synchronize_rcu(); + rcu_barrier(); + sta_info_flush_cleanup(sdata); + skb_queue_purge(&sdata->skb_queue); /* @@ -961,7 +971,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = sdata->local; int flushed; int i; @@ -977,7 +986,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - flushed = sta_info_flush(local, sdata); + flushed = sta_info_flush(sdata); WARN_ON(flushed); } @@ -1218,6 +1227,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps.bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_P2P_CLIENT: type = NL80211_IFTYPE_STATION; @@ -1225,9 +1235,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.p2p = true; /* fall through */ case NL80211_IFTYPE_STATION: + sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; ieee80211_sta_setup_sdata(sdata); break; case NL80211_IFTYPE_ADHOC: + sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; ieee80211_ibss_setup_sdata(sdata); break; case NL80211_IFTYPE_MESH_POINT: @@ -1241,8 +1253,12 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: + sdata->vif.bss_conf.bssid = NULL; + break; case NL80211_IFTYPE_AP_VLAN: + break; case NL80211_IFTYPE_P2P_DEVICE: + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1558,9 +1574,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* initialise type-independent data */ sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; -#ifdef CONFIG_INET - sdata->arp_filter_state = true; -#endif for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 619c5d697999..ef252eb58c36 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -204,8 +204,11 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = key_mtx_dereference(sdata->local, sdata->keys[idx]); - if (uni) + if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + drv_set_default_unicast_key(sdata->local, sdata, idx); + } + if (multi) rcu_assign_pointer(sdata->default_multicast_key, key); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1b087fff93e7..2bdd454e8bcf 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -23,6 +23,7 @@ #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> +#include <net/addrconf.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -207,76 +208,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { struct ieee80211_local *local = sdata->local; - static const u8 zero[ETH_ALEN] = { 0 }; if (!changed) return; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; - } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; - else if (sdata->vif.type == NL80211_IFTYPE_AP) - sdata->vif.bss_conf.bssid = sdata->vif.addr; - else if (sdata->vif.type == NL80211_IFTYPE_WDS) - sdata->vif.bss_conf.bssid = NULL; - else if (ieee80211_vif_is_mesh(&sdata->vif)) { - sdata->vif.bss_conf.bssid = zero; - } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { - sdata->vif.bss_conf.bssid = sdata->vif.addr; - WARN_ONCE(changed & ~(BSS_CHANGED_IDLE), - "P2P Device BSS changed %#x", changed); - } else { - WARN_ON(1); - return; - } - - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_MESH_POINT: - break; - default: - /* do not warn to simplify caller in scan.c */ - changed &= ~BSS_CHANGED_BEACON_ENABLED; - if (WARN_ON(changed & BSS_CHANGED_BEACON)) - return; - break; - } - - if (changed & BSS_CHANGED_BEACON_ENABLED) { - if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { - sdata->vif.bss_conf.enable_beacon = false; - } else { - /* - * Beacon should be enabled, but AP mode must - * check whether there is a beacon configured. - */ - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ap.beacon; - break; - case NL80211_IFTYPE_ADHOC: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ibss.presp; - break; -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.mesh.mesh_id_len; - break; -#endif - default: - /* not reached */ - WARN_ON(1); - break; - } - } - } - drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed); } @@ -415,27 +350,19 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; - while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) { - bss_conf->arp_addr_list[c] = ifa->ifa_address; + while (ifa) { + if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN) + bss_conf->arp_addr_list[c] = ifa->ifa_address; ifa = ifa->ifa_next; c++; } - /* If not all addresses fit the list, disable filtering */ - if (ifa) { - sdata->arp_filter_state = false; - c = 0; - } else { - sdata->arp_filter_state = true; - } bss_conf->arp_addr_cnt = c; /* Configure driver only if associated (which also implies it is up) */ - if (ifmgd->associated) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (ifmgd->associated) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - } mutex_unlock(&ifmgd->mtx); @@ -443,6 +370,37 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } #endif +#if IS_ENABLED(CONFIG_IPV6) +static int ieee80211_ifa6_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)arg; + struct inet6_dev *idev = ifa->idev; + struct net_device *ndev = ifa->idev->dev; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, ifa6_notifier); + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct ieee80211_sub_if_data *sdata; + + /* Make sure it's our interface that got changed */ + if (!wdev || wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + + /* + * For now only support station mode. This is mostly because + * doing AP would have to handle AP_VLAN in some way ... + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + drv_ipv6_addr_change(local, sdata, idev); + + return NOTIFY_DONE; +} +#endif + static int ieee80211_napi_poll(struct napi_struct *napi, int budget) { struct ieee80211_local *local = @@ -537,6 +495,7 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | + IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, @@ -606,7 +565,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER; + NL80211_FEATURE_VIF_TXPOWER | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -1049,12 +1009,25 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa; #endif +#if IS_ENABLED(CONFIG_IPV6) + local->ifa6_notifier.notifier_call = ieee80211_ifa6_changed; + result = register_inet6addr_notifier(&local->ifa6_notifier); + if (result) + goto fail_ifa6; +#endif + netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, local->hw.napi_weight); return 0; +#if IS_ENABLED(CONFIG_IPV6) + fail_ifa6: #ifdef CONFIG_INET + unregister_inetaddr_notifier(&local->ifa_notifier); +#endif +#endif +#if defined(CONFIG_INET) || defined(CONFIG_IPV6) fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); @@ -1090,6 +1063,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #ifdef CONFIG_INET unregister_inetaddr_notifier(&local->ifa_notifier); #endif +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&local->ifa6_notifier); +#endif rtnl_lock(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 649ad513547f..694e27376afa 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -20,16 +20,11 @@ int mesh_allocated; static struct kmem_cache *rm_cache; -#ifdef CONFIG_MAC80211_MESH bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { return (mgmt->u.action.u.mesh_action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION); } -#else -bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) -{ return false; } -#endif void ieee80211s_init(void) { @@ -607,6 +602,12 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; + u32 changed = BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -624,15 +625,16 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_queue_work(&local->hw, &sdata->work); sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; - sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(sdata->local, - ieee80211_get_sdata_band(sdata)); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT); + ieee80211_mandatory_rates(local, band); + + if (band == IEEE80211_BAND_5GHZ) { + sdata->vif.bss_conf.use_short_slot = true; + changed |= BSS_CHANGED_ERP_SLOT; + } + + ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(sdata->dev); } @@ -646,10 +648,12 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) /* stop the beacon */ ifmsh->mesh_id_len = 0; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); mesh_path_flush_by_iface(sdata); del_timer_sync(&sdata->u.mesh.housekeeping_timer); @@ -805,6 +809,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + static u8 zero_addr[ETH_ALEN] = {}; setup_timer(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, @@ -830,4 +835,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) INIT_LIST_HEAD(&ifmsh->preq_queue.list); spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); + + sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 84c28c6101cd..aff301544c7f 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -191,8 +191,6 @@ struct mesh_rmc { #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ - #define MESH_PATH_EXPIRE (600 * HZ) /* Default maximum number of plinks per interface */ @@ -307,6 +305,20 @@ extern int mesh_paths_generation; #ifdef CONFIG_MAC80211_MESH extern int mesh_allocated; +static inline +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_inc(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} + +static inline +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_dec(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} + static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) { return sdata->u.mesh.mshcfg.dot11MeshMaxPeerLinks - diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 47aeee2d8db1..6b4603a90031 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -215,16 +215,19 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, skb->priority = 7; info->control.vif = &sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; ieee80211_set_qos_hdr(sdata, skb); } /** - * mesh_send_path error - Sends a PERR mesh management frame + * mesh_path_error_tx - Sends a PERR mesh management frame * + * @ttl: allowed remaining hops * @target: broken destination * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to + * @sdata: local mesh subif * * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the @@ -246,11 +249,13 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, return -EAGAIN; skb = dev_alloc_skb(local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom); + skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -350,6 +355,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * @sdata: local mesh subif * @mgmt: mesh management frame * @hwmp_ie: hwmp information element (PREP or PREQ) + * @action: type of hwmp ie * * This function updates the path routing information to the originator and the * transmitter of a HWMP PREQ or PREP frame. diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 4b274e9c91a5..81e612682bc3 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -41,20 +41,6 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, u8 *da, __le16 llid, __le16 plid, __le16 reason); -static inline -u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} - -static inline -u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} - /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine * @@ -69,30 +55,6 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) sta->plink_retries = 0; } -/* - * Allocate mesh sta entry and insert into station table - */ -static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr) -{ - struct sta_info *sta; - - if (sdata->local->num_sta >= MESH_MAX_PLINKS) - return NULL; - - sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); - if (!sta) - return NULL; - - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); - - set_sta_flag(sta, WLAN_STA_WME); - - return sta; -} - /** * mesh_set_ht_prot_mode - set correct HT protection mode * @@ -323,53 +285,27 @@ free: return err; } -/** - * mesh_peer_init - initialize new mesh peer and return resulting sta_info - * - * @sdata: local meshif - * @addr: peer's address - * @elems: IEs from beacon or mesh peering frame - * - * call under RCU - */ -static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, - u8 *addr, - struct ieee802_11_elems *elems) +static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; - u32 rates, basic_rates = 0; - struct sta_info *sta; - bool insert = false; + u32 rates, basic_rates = 0, changed = 0; sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates); - sta = sta_info_get(sdata, addr); - if (!sta) { - /* Userspace handles peer allocation when security is enabled */ - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) { - cfg80211_notify_new_peer_candidate(sdata->dev, addr, - elems->ie_start, - elems->total_len, - GFP_ATOMIC); - return NULL; - } - - sta = mesh_plink_alloc(sdata, addr); - if (!sta) - return NULL; - insert = true; - } - spin_lock_bh(&sta->lock); sta->last_rx = jiffies; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - spin_unlock_bh(&sta->lock); - return sta; - } + /* rates and capabilities don't change during peering */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + goto out; + + if (sta->sta.supp_rates[band] != rates) + changed |= IEEE80211_RC_SUPP_RATES_CHANGED; sta->sta.supp_rates[band] = rates; if (elems->ht_cap_elem && sdata->vif.bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) @@ -388,27 +324,115 @@ static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, elems->ht_operation, &chandef); + if (sta->ch_width != chandef.width) + changed |= IEEE80211_RC_BW_CHANGED; sta->ch_width = chandef.width; } if (insert) rate_control_rate_init(sta); + else + rate_control_rate_update(local, sband, sta, changed); +out: spin_unlock_bh(&sta->lock); +} - if (insert && sta_info_insert(sta)) +static struct sta_info * +__mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr) +{ + struct sta_info *sta; + + if (sdata->local->num_sta >= MESH_MAX_PLINKS) return NULL; + sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); + if (!sta) + return NULL; + + sta->plink_state = NL80211_PLINK_LISTEN; + init_timer(&sta->plink_timer); + + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); + + set_sta_flag(sta, WLAN_STA_WME); + + return sta; +} + +static struct sta_info * +mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, + struct ieee802_11_elems *elems) +{ + struct sta_info *sta = NULL; + + /* Userspace handles peer allocation when security is enabled */ + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) + cfg80211_notify_new_peer_candidate(sdata->dev, addr, + elems->ie_start, + elems->total_len, + GFP_KERNEL); + else + sta = __mesh_sta_info_alloc(sdata, addr); + return sta; } +/* + * mesh_sta_info_get - return mesh sta info entry for @addr. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame. + * + * Return existing or newly allocated sta_info under RCU read lock. + * (re)initialize with given IEs. + */ +static struct sta_info * +mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, + u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU) +{ + struct sta_info *sta = NULL; + + rcu_read_lock(); + sta = sta_info_get(sdata, addr); + if (sta) { + mesh_sta_info_init(sdata, sta, elems, false); + } else { + rcu_read_unlock(); + /* can't run atomic */ + sta = mesh_sta_info_alloc(sdata, addr, elems); + if (!sta) { + rcu_read_lock(); + return NULL; + } + + mesh_sta_info_init(sdata, sta, elems, true); + + if (sta_info_insert_rcu(sta)) + return NULL; + } + + return sta; +} + +/* + * mesh_neighbour_update - update or initialize new mesh neighbor. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame + * + * Initiates peering if appropriate. + */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *elems) { struct sta_info *sta; - rcu_read_lock(); - sta = mesh_peer_init(sdata, hw_addr, elems); + sta = mesh_sta_info_get(sdata, hw_addr, elems); if (!sta) goto out; @@ -646,6 +670,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); + /* WARNING: Only for sta pointer, is dropped & re-acquired */ rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -749,8 +774,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } if (event == OPN_ACPT) { + rcu_read_unlock(); /* allocate sta entry if necessary and update info */ - sta = mesh_peer_init(sdata, mgmt->sa, &elems); + sta = mesh_sta_info_get(sdata, mgmt->sa, &elems); if (!sta) { mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a3552929a21d..72924399077e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -199,11 +199,11 @@ static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata, case NL80211_CHAN_WIDTH_40: if (sdata->vif.bss_conf.chandef.chan->center_freq > sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40PLUS) + chan->flags & IEEE80211_CHAN_NO_HT40MINUS) disable_40 = true; if (sdata->vif.bss_conf.chandef.chan->center_freq < sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40MINUS) + chan->flags & IEEE80211_CHAN_NO_HT40PLUS) disable_40 = true; break; default: @@ -341,11 +341,13 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_supported_band *sband) + struct ieee80211_supported_band *sband, + struct ieee80211_vht_cap *ap_vht_cap) { u8 *pos; u32 cap; struct ieee80211_sta_vht_cap vht_cap; + int i; BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); @@ -364,6 +366,42 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; } + /* + * Some APs apparently get confused if our capabilities are better + * than theirs, so restrict what we advertise in the assoc request. + */ + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) + cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC))) + cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4); + + for (i = 0; i < 8; i++) { + int shift = i * 2; + u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift; + u16 ap_mcs, our_mcs; + + ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) & + mask) >> shift; + our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) & + mask) >> shift; + + switch (ap_mcs) { + default: + if (our_mcs <= ap_mcs) + break; + /* fall through */ + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask); + vht_cap.vht_mcs.rx_mcs_map |= + cpu_to_le16(ap_mcs << shift); + } + } + /* reserve and fill IE */ pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); @@ -562,7 +600,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) sband, chan, sdata->smps_mode); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) - ieee80211_add_vht_ie(sdata, skb, sband); + ieee80211_add_vht_ie(sdata, skb, sband, + &assoc_data->ap_vht_cap); /* if present, add any custom non-vendor IEs that go after HT */ if (assoc_data->ie_len && assoc_data->ie) { @@ -1426,10 +1465,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_CQM; /* Enable ARP filtering */ - if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (bss_conf->arp_addr_cnt) bss_info_changed |= BSS_CHANGED_ARP_FILTER; - } ieee80211_bss_info_change_notify(sdata, bss_info_changed); @@ -1450,7 +1487,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - struct sta_info *sta; u32 changed = 0; ASSERT_MGD_MTX(ifmgd); @@ -1482,14 +1518,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, ifmgd->bssid); - if (sta) { - set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); - } - mutex_unlock(&local->sta_mtx); - /* * if we want to get out of ps before disassoc (why?) we have * to do it before sending disassoc, as otherwise the null-packet @@ -1521,7 +1549,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush(local, sdata); + sta_info_flush_defer(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1543,10 +1571,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&local->dynamic_ps_enable_work); /* Disable ARP filtering */ - if (sdata->vif.bss_conf.arp_filter_enabled) { - sdata->vif.bss_conf.arp_filter_enabled = false; + if (sdata->vif.bss_conf.arp_addr_cnt) changed |= BSS_CHANGED_ARP_FILTER; - } sdata->vif.bss_conf.qos = false; changed |= BSS_CHANGED_QOS; @@ -2369,8 +2395,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -2404,7 +2429,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, return; bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); @@ -2447,7 +2472,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ifmgd->associated && ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) @@ -2528,8 +2553,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); ifmgd->assoc_data->have_beacon = true; ifmgd->assoc_data->sent_assoc = false; /* continue assoc process */ @@ -2571,12 +2595,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sig > ifmgd->rssi_max_thold && (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_HIGH); + drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH); } else if (sig < ifmgd->rssi_min_thold && (last_sig >= ifmgd->rssi_max_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_LOW); + drv_rssi_callback(local, sdata, RSSI_EVENT_LOW); } } @@ -2682,8 +2706,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len)) @@ -3133,23 +3156,22 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (!ifmgd->associated) + mutex_lock(&ifmgd->mtx); + if (!ifmgd->associated) { + mutex_unlock(&ifmgd->mtx); return; + } if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { - mlme_dbg(sdata, - "driver requested disconnect after resume\n"); - ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, - WLAN_REASON_UNSPECIFIED); - mutex_unlock(&ifmgd->mtx); - return; - } + mlme_dbg(sdata, "driver requested disconnect after resume\n"); + ieee80211_sta_connection_lost(sdata, + ifmgd->associated->bssid, + WLAN_REASON_UNSPECIFIED); mutex_unlock(&ifmgd->mtx); + return; } + mutex_unlock(&ifmgd->mtx); if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) add_timer(&ifmgd->timer); @@ -3400,6 +3422,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; +out: while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { @@ -3408,14 +3431,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, goto out; } - ret = chandef_downgrade(chandef); + ret |= chandef_downgrade(chandef); } if (chandef->width != vht_chandef.width) sdata_info(sdata, - "local regulatory prevented using AP HT/VHT configuration, downgraded\n"); + "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); -out: WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); return ret; } @@ -3529,8 +3551,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); - while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); + ret = ieee80211_vif_use_channel(sdata, &chandef, + IEEE80211_CHANCTX_SHARED); + } return ret; } @@ -3756,7 +3781,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)req->bss->priv; struct ieee80211_mgd_assoc_data *assoc_data; struct ieee80211_supported_band *sband; - const u8 *ssidie, *ht_ie; + const u8 *ssidie, *ht_ie, *vht_ie; int i, err; assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); @@ -3875,6 +3900,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; else ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); + if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) + memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, + sizeof(struct ieee80211_vht_cap)); + else + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; rcu_read_unlock(); if (bss->wmm_used && bss->uapsd_supported && diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index a5379aea7d09..82baf5b6ecf4 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -102,8 +102,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, - bool offchannel_ps_enable) +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -126,16 +125,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); /* Check to see if we should disable beaconing. */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.bss_conf.enable_beacon) { + set_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state); + sdata->vif.bss_conf.enable_beacon = false; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); + } if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { netif_tx_stop_all_queues(sdata->dev); - if (offchannel_ps_enable && - (sdata->vif.type == NL80211_IFTYPE_STATION) && + if (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.associated) ieee80211_offchannel_ps_enable(sdata); } @@ -143,8 +143,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_return(struct ieee80211_local *local, - bool offchannel_ps_disable) +void ieee80211_offchannel_return(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -163,11 +162,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (offchannel_ps_disable && - sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->u.mgd.associated) - ieee80211_offchannel_ps_disable(sdata); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_disable(sdata); if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { /* @@ -183,11 +180,12 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (test_and_clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state)) { + sdata->vif.bss_conf.enable_beacon = true; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); + } } mutex_unlock(&local->iflist_mtx); } @@ -385,7 +383,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) local->tmp_channel = NULL; ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, true); + ieee80211_offchannel_return(local); } ieee80211_recalc_idle(local); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 79a48f37d409..e45b83610e85 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -7,25 +7,23 @@ #include "led.h" /* return value indicates whether the driver should be further notified */ -static bool ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) +static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) { switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_ADHOC: ieee80211_ibss_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_MESH_POINT: ieee80211_mesh_quiesce(sdata); - return true; - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* don't tell driver about this */ - return false; + break; default: - return true; + break; } + + cancel_work_sync(&sdata->work); } int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) @@ -44,7 +42,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&local->sta_mtx); } @@ -94,10 +93,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(err != 1); local->wowlan = false; } else { - list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); - ieee80211_quiesce(sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) + if (ieee80211_sdata_running(sdata)) + ieee80211_quiesce(sdata); goto suspend; } } @@ -124,17 +122,43 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); + static u8 zero_addr[ETH_ALEN] = {}; + u32 changed = 0; - if (!ieee80211_quiesce(sdata)) + if (!ieee80211_sdata_running(sdata)) continue; - if (!ieee80211_sdata_running(sdata)) + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* skip these */ continue; + case NL80211_IFTYPE_STATION: + if (sdata->vif.bss_conf.assoc) + changed = BSS_CHANGED_ASSOC | + BSS_CHANGED_BSSID | + BSS_CHANGED_IDLE; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + if (sdata->vif.bss_conf.enable_beacon) + changed = BSS_CHANGED_BEACON_ENABLED; + break; + default: + break; + } + + ieee80211_quiesce(sdata); + + sdata->suspend_bss_conf = sdata->vif.bss_conf; + memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf)); + sdata->vif.bss_conf.idle = true; + if (sdata->suspend_bss_conf.bssid) + sdata->vif.bss_conf.bssid = zero_addr; - /* disable beaconing */ - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BEACON_ENABLED); + /* disable beaconing or remove association */ + ieee80211_bss_info_change_notify(sdata, changed); if (sdata->vif.type == NL80211_IFTYPE_AP && rcu_access_pointer(sdata->u.ap.beacon)) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 580704eba8b8..a19089565c4b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2353,7 +2353,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC) break; - /* verify action & smps_control are present */ + /* verify action & smps_control/chanwidth are present */ if (len < IEEE80211_MIN_ACTION_SIZE + 2) goto invalid; @@ -2392,6 +2392,35 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) IEEE80211_RC_SMPS_CHANGED); goto handled; } + case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { + struct ieee80211_supported_band *sband; + u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + bool old_40mhz, new_40mhz; + + /* If it doesn't support 40 MHz it can't change ... */ + if (!rx->sta->supports_40mhz) + goto handled; + + old_40mhz = rx->sta->sta.ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40; + new_40mhz = chanwidth == IEEE80211_HT_CHANWIDTH_ANY; + + if (old_40mhz == new_40mhz) + goto handled; + + if (new_40mhz) + rx->sta->sta.ht_cap.cap |= + IEEE80211_HT_CAP_SUP_WIDTH_20_40; + else + rx->sta->sta.ht_cap.cap &= + ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + sband = rx->local->hw.wiphy->bands[status->band]; + + rate_control_rate_update(local, sband, rx->sta, + IEEE80211_RC_BW_CHANGED); + goto handled; + } default: goto invalid; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index d59fc6818b1c..607684c47d55 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -65,12 +65,11 @@ static bool is_uapsd_supported(struct ieee802_11_elems *elems) struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon) + struct ieee80211_channel *channel) { + bool beacon = ieee80211_is_beacon(mgmt->frame_control); struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; @@ -203,7 +202,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, &elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); } @@ -292,7 +291,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (!was_hw_scan) { ieee80211_configure_filter(local); drv_sw_scan_complete(local); - ieee80211_offchannel_return(local, true); + ieee80211_offchannel_return(local); } ieee80211_recalc_idle(local); @@ -341,7 +340,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - ieee80211_offchannel_stop_vifs(local, true); + ieee80211_offchannel_stop_vifs(local); ieee80211_configure_filter(local); @@ -678,12 +677,8 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local, local->scan_channel = NULL; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - /* - * Re-enable vifs and beaconing. Leave PS - * in off-channel state..will put that back - * on-channel at the end of scanning. - */ - ieee80211_offchannel_return(local, false); + /* disable PS */ + ieee80211_offchannel_return(local); *next_delay = HZ / 5; /* afterwards, resume scan & go to next channel */ @@ -693,8 +688,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local, static void ieee80211_scan_state_resume(struct ieee80211_local *local, unsigned long *next_delay) { - /* PS already is in off-channel mode */ - ieee80211_offchannel_stop_vifs(local, false); + ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { drv_flush(local, false); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ca9fde198188..227233c3ff7f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -104,6 +104,16 @@ static void cleanup_single_sta(struct sta_info *sta) * neither mac80211 nor the driver can reference this * sta struct any more except by still existing timers * associated with this station that we clean up below. + * + * Note though that this still uses the sdata and even + * calls the driver in AP and mesh mode, so interfaces + * of those types mush use call sta_info_flush_cleanup() + * (typically via sta_info_flush()) before deconfiguring + * the driver. + * + * In station mode, nothing happens here so it doesn't + * have to (and doesn't) do that, this is intentional to + * speed up roaming. */ if (test_sta_flag(sta, WLAN_STA_PS_STA)) { @@ -370,11 +380,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); -#ifdef CONFIG_MAC80211_MESH - sta->plink_state = NL80211_PLINK_LISTEN; - init_timer(&sta->plink_timer); -#endif - return sta; } @@ -774,7 +779,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); + ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); if (ret) @@ -885,20 +890,12 @@ void sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - sta_info_flush(local, NULL); } -/** - * sta_info_flush - flush matching STA entries from the STA table - * - * Returns the number of removed STA entries. - * - * @local: local interface data - * @sdata: matching rule for the net device (sta->dev) or %NULL to match all STAs - */ -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; int ret = 0; @@ -906,30 +903,22 @@ int sta_info_flush(struct ieee80211_local *local, mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (!sdata || sdata == sta->sdata) { + if (sdata == sta->sdata) { WARN_ON(__sta_info_destroy(sta)); ret++; } } mutex_unlock(&local->sta_mtx); - rcu_barrier(); - - if (sdata) { - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); - } else { - mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); - } - mutex_unlock(&local->iflist_mtx); - } - return ret; } +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) +{ + ieee80211_cleanup_sdata_stas(sdata); + cancel_work_sync(&sdata->cleanup_stations_wk); +} + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 37c1889afd3a..af7d78aa5523 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -92,6 +92,13 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_WANT_START 4 #define HT_AGG_STATE_WANT_STOP 5 +enum ieee80211_agg_stop_reason { + AGG_STOP_DECLINED, + AGG_STOP_LOCAL_REQUEST, + AGG_STOP_PEER_REQUEST, + AGG_STOP_DESTROY_STA, +}; + /** * struct tid_ampdu_tx - TID aggregation information (Tx). * @@ -548,8 +555,39 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush_cleanup - flush the sta_info cleanup queue + * @sdata: the interface + * + * Flushes the sta_info cleanup queue for a given interface; + * this is necessary before the interface is removed or, for + * AP/mesh interfaces, before it is deconfigured. + * + * Note an rcu_barrier() must precede the function, after all + * stations have been flushed/removed to ensure the call_rcu() + * calls that add stations to the cleanup queue have completed. + */ +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush - flush matching STA entries from the STA table + * + * Returns the number of removed STA entries. + * + * @sdata: sdata to remove all stations from + */ +static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) +{ + int ret = sta_info_flush_defer(sdata); + + rcu_barrier(); + sta_info_flush_cleanup(sdata); + + return ret; +} + void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a8270b441a6f..6ca53d64cb28 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,21 +28,27 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANCTX_ENTRY __field(u32, control_freq) \ +#define CHANDEF_ENTRY __field(u32, control_freq) \ __field(u32, chan_width) \ __field(u32, center_freq1) \ - __field(u32, center_freq2) \ + __field(u32, center_freq2) +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c)->chan->center_freq; \ + __entry->chan_width = (c)->width; \ + __entry->center_freq1 = (c)->center_freq1; \ + __entry->center_freq1 = (c)->center_freq2; +#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" +#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ + __entry->center_freq1, __entry->center_freq2 + +#define CHANCTX_ENTRY CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN __entry->control_freq = ctx->conf.def.chan->center_freq;\ - __entry->chan_width = ctx->conf.def.width; \ - __entry->center_freq1 = ctx->conf.def.center_freq1; \ - __entry->center_freq2 = ctx->conf.def.center_freq2; \ +#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT " control:%d MHz width:%d center: %d/%d MHz chains:%d/%d" -#define CHANCTX_PR_ARG __entry->control_freq, __entry->chan_width, \ - __entry->center_freq1, __entry->center_freq2, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -341,8 +347,11 @@ TRACE_EVENT(drv_bss_info_changed, __field(s32, cqm_rssi_hyst); __field(u32, channel_width); __field(u32, channel_cfreq1); - __dynamic_array(u32, arp_addr_list, info->arp_addr_cnt); - __field(bool, arp_filter_enabled); + __dynamic_array(u32, arp_addr_list, + info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt); + __field(int, arp_addr_cnt); __field(bool, qos); __field(bool, idle); __field(bool, ps); @@ -378,9 +387,11 @@ TRACE_EVENT(drv_bss_info_changed, __entry->cqm_rssi_hyst = info->cqm_rssi_hyst; __entry->channel_width = info->chandef.width; __entry->channel_cfreq1 = info->chandef.center_freq1; + __entry->arp_addr_cnt = info->arp_addr_cnt; memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list, - sizeof(u32) * info->arp_addr_cnt); - __entry->arp_filter_enabled = info->arp_filter_enabled; + sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt)); __entry->qos = info->qos; __entry->idle = info->idle; __entry->ps = info->ps; @@ -1178,23 +1189,26 @@ TRACE_EVENT(drv_set_rekey_data, TRACE_EVENT(drv_rssi_callback, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, enum ieee80211_rssi_event rssi_event), - TP_ARGS(local, rssi_event), + TP_ARGS(local, sdata, rssi_event), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u32, rssi_event) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->rssi_event = rssi_event; ), TP_printk( - LOCAL_PR_FMT " rssi_event:%d", - LOCAL_PR_ARG, __entry->rssi_event + LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event ) ); @@ -1426,6 +1440,14 @@ DEFINE_EVENT(local_only_evt, drv_restart_complete, TP_ARGS(local) ); +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); +#endif + /* * Tracing for API calls that drivers call. */ @@ -1815,6 +1837,29 @@ TRACE_EVENT(stop_queue, ) ); +TRACE_EVENT(drv_set_default_unicast_key, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx), + + TP_ARGS(local, sdata, key_idx), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(int, key_idx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->key_idx = key_idx; + ), + + TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx) +); + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e9eadc40c09c..a2cb6a302cc7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1673,10 +1673,13 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, chanctx_conf = rcu_dereference(tmp_sdata->vif.chanctx_conf); } - if (!chanctx_conf) - goto fail_rcu; - chan = chanctx_conf->def.chan; + if (chanctx_conf) + chan = chanctx_conf->def.chan; + else if (!local->use_chanctx) + chan = local->_oper_channel; + else + goto fail_rcu; /* * Frame injection is not allowed if beaconing is not allowed @@ -1784,16 +1787,16 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; /* fall through */ case NL80211_IFTYPE_AP: + if (sdata->vif.type == NL80211_IFTYPE_AP) + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) + goto fail_rcu; fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; - if (sdata->vif.type == NL80211_IFTYPE_AP) - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) - goto fail_rcu; band = chanctx_conf->def.chan->band; break; case NL80211_IFTYPE_WDS: @@ -2261,9 +2264,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ -static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, - struct sk_buff *skb) +static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) { u8 *pos, *tim; int aid0 = 0; @@ -2325,6 +2327,31 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } } +static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + + /* + * Not very nice, but we want to allow the driver to call + * ieee80211_beacon_get() as a response to the set_tim() + * callback. That, however, is already invoked under the + * sta_lock to guarantee consistent and race-free update + * of the tim bitmap in mac80211 and the driver. + */ + if (local->tim_in_locked_section) { + __ieee80211_beacon_add_tim(sdata, ps, skb); + } else { + unsigned long flags; + + spin_lock_irqsave(&local->tim_lock, flags); + __ieee80211_beacon_add_tim(sdata, ps, skb); + spin_unlock_irqrestore(&local->tim_lock, flags); + } + + return 0; +} + struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 *tim_offset, u16 *tim_length) @@ -2369,22 +2396,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - /* - * Not very nice, but we want to allow the driver to call - * ieee80211_beacon_get() as a response to the set_tim() - * callback. That, however, is already invoked under the - * sta_lock to guarantee consistent and race-free update - * of the tim bitmap in mac80211 and the driver. - */ - if (local->tim_in_locked_section) { - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - } else { - unsigned long flags; - - spin_lock_irqsave(&local->tim_lock, flags); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - spin_unlock_irqrestore(&local->tim_lock, flags); - } + ieee80211_beacon_add_tim(sdata, &ap->ps, skb); if (tim_offset) *tim_offset = beacon->head_len; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f11e8c540db4..7519018ff71a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1358,6 +1358,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; struct sta_info *sta; int res, i; + bool reconfig_due_to_wowlan = false; #ifdef CONFIG_PM if (local->suspended) @@ -1377,6 +1378,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * res is 1, which means the driver requested * to go through a regular reset on wakeup. */ + reconfig_due_to_wowlan = true; } #endif /* everything else happens only if HW was up & running */ @@ -1526,6 +1528,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_IDLE | BSS_CHANGED_TXPOWER; +#ifdef CONFIG_PM + if (local->resuming && !reconfig_due_to_wowlan) + sdata->vif.bss_conf = sdata->suspend_bss_conf; +#endif + switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | @@ -1550,9 +1557,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* fall through */ case NL80211_IFTYPE_MESH_POINT: - changed |= BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED; - ieee80211_bss_info_change_notify(sdata, changed); + if (sdata->vif.bss_conf.enable_beacon) { + changed |= BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED; + ieee80211_bss_info_change_notify(sdata, changed); + } break; case NL80211_IFTYPE_WDS: break; @@ -1632,7 +1641,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } @@ -1646,10 +1656,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!local->suspended) { + if (!local->suspended || reconfig_due_to_wowlan) drv_restart_complete(local); + + if (!local->suspended) return 0; - } #ifdef CONFIG_PM /* first set suspended false, then resuming */ @@ -1864,7 +1875,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, - u32 cap) + u32 cap) { __le32 tmp; diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 199b92261e94..d20c6d3c247d 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -41,7 +41,7 @@ static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) return -EINVAL; *val = skb->data[0]; - skb_pull(skb, 1); + skb_pull(skb, 1); return 0; } @@ -137,16 +137,12 @@ static int mac802154_header_create(struct sk_buff *skb, struct ieee802154_addr dev_addr; struct mac802154_sub_if_data *priv = netdev_priv(dev); int pos = 2; - u8 *head; + u8 head[MAC802154_FRAME_HARD_HEADER_LEN]; u16 fc; if (!daddr) return -EINVAL; - head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; - head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ fc = mac_cb_type(skb); @@ -210,7 +206,6 @@ static int mac802154_header_create(struct sk_buff *skb, head[1] = fc >> 8; memcpy(skb_push(skb, pos), head, pos); - kfree(head); return pos; } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5c0b78528e55..b7d4cb475ae6 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -234,7 +234,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1, const struct hash_ip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); } static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6283351f4eeb..d8f77bacae86 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -284,7 +284,7 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, const struct hash_ipport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6a21271c8d5a..1da1e955f38b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -294,8 +294,8 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2d5cd4ee30eb..f2627226a087 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -388,8 +388,8 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, const struct hash_ipportnet6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 29e94b981f3f..4b677cf6bf7d 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -286,7 +286,7 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1, const struct hash_net6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr; } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 45a101439bc5..6ba985f1c96f 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -471,7 +471,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7ef700de596c..af20c0c5ced2 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -350,7 +350,7 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1, const struct hash_netport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto && ip1->cidr == ip2->cidr; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 30e764ad021f..68e368a4beed 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -796,8 +796,7 @@ static void ip_vs_conn_expire(unsigned long data) */ if (likely(atomic_read(&cp->refcnt) == 1)) { /* delete the timer if it is activated by other users */ - if (timer_pending(&cp->timer)) - del_timer(&cp->timer); + del_timer(&cp->timer); /* does anybody control me? */ if (cp->control) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 746048b13ef3..ae8ec6f27688 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -61,14 +61,27 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 1; } +static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, + unsigned int sctphoff) +{ + __u32 crc32; + struct sk_buff *iter; + + crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff); + skb_walk_frags(skb, iter) + crc32 = sctp_update_cksum((u8 *) iter->data, + skb_headlen(iter), crc32); + sctph->checksum = sctp_end_cksum(crc32); + + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -92,13 +105,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->source = cp->vport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } @@ -109,8 +116,6 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -134,13 +139,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->dest = cp->dport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index effa10c9e4e3..44fd10c539ac 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1795,6 +1795,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) GFP_KERNEL); if (!tinfo->buf) goto outtinfo; + } else { + tinfo->buf = NULL; } tinfo->id = id; diff --git a/net/nfc/core.c b/net/nfc/core.c index aa64ea441676..25522e56d350 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -338,7 +338,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) dev->active_target = target; dev->rf_mode = NFC_RF_INITIATOR; - if (dev->ops->check_presence) + if (dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } @@ -429,7 +429,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, cb_context); - if (!rc && dev->ops->check_presence) + if (!rc && dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) { @@ -684,11 +684,6 @@ static void nfc_release(struct device *d) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - if (dev->ops->check_presence) { - del_timer_sync(&dev->check_pres_timer); - cancel_work_sync(&dev->check_pres_work); - } - nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); kfree(dev); @@ -706,15 +701,16 @@ static void nfc_check_pres_work(struct work_struct *work) rc = dev->ops->check_presence(dev, dev->active_target); if (rc == -EOPNOTSUPP) goto exit; - if (!rc) { - mod_timer(&dev->check_pres_timer, jiffies + - msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); - } else { + if (rc) { u32 active_target_idx = dev->active_target->idx; device_unlock(&dev->dev); nfc_target_lost(dev, active_target_idx); return; } + + if (!dev->shutting_down) + mod_timer(&dev->check_pres_timer, jiffies + + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } exit: @@ -761,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -778,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; @@ -853,26 +852,27 @@ void nfc_unregister_device(struct nfc_dev *dev) id = dev->idx; - mutex_lock(&nfc_devlist_mutex); - nfc_devlist_generation++; - - /* lock to avoid unregistering a device while an operation - is in progress */ - device_lock(&dev->dev); - device_del(&dev->dev); - device_unlock(&dev->dev); + if (dev->ops->check_presence) { + device_lock(&dev->dev); + dev->shutting_down = true; + device_unlock(&dev->dev); + del_timer_sync(&dev->check_pres_timer); + cancel_work_sync(&dev->check_pres_work); + } - mutex_unlock(&nfc_devlist_mutex); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); nfc_llcp_unregister_device(dev); - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was removed\n", - dev_name(&dev->dev)); + mutex_lock(&nfc_devlist_mutex); + nfc_devlist_generation++; + device_del(&dev->dev); + mutex_unlock(&nfc_devlist_mutex); ida_simple_remove(&nfc_index_ida, id); - } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 7d99410e6c1a..64f922be9281 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -280,14 +280,19 @@ static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) { u8 param[2]; + size_t param_len = 2; /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ pr_debug("\n"); + if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) + param_len = 0; + return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, - NFC_HCI_ADM_CLEAR_ALL_PIPE, param, 2, NULL); + NFC_HCI_ADM_CLEAR_ALL_PIPE, param, param_len, + NULL); } int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7bea574d5934..91020b210d87 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -57,6 +57,8 @@ static void nfc_hci_msg_tx_work(struct work_struct *work) int r = 0; mutex_lock(&hdev->msg_tx_mutex); + if (hdev->shutting_down) + goto exit; if (hdev->cmd_pending_msg) { if (timer_pending(&hdev->cmd_timer) == 0) { @@ -295,6 +297,12 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, goto exit; } + if (hdev->ops->event_received) { + r = hdev->ops->event_received(hdev, gate, event, skb); + if (r <= 0) + goto exit_noskb; + } + switch (event) { case NFC_HCI_EVT_TARGET_DISCOVERED: if (skb->len < 1) { /* no status data? */ @@ -320,17 +328,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, r = nfc_hci_target_discovered(hdev, gate); break; default: - if (hdev->ops->event_received) { - hdev->ops->event_received(hdev, gate, event, skb); - return; - } - + pr_info("Discarded unknown event %x to gate %x\n", event, gate); + r = -EINVAL; break; } exit: kfree_skb(skb); +exit_noskb: if (r) { /* TODO: There was an error dispatching the event, * how to propagate up to nfc core? @@ -669,8 +675,10 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) if (hdev->ops->tm_send) return hdev->ops->tm_send(hdev, skb); - else - return -ENOTSUPP; + + kfree_skb(skb); + + return -ENOTSUPP; } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -787,7 +795,9 @@ static struct nfc_ops hci_nfc_ops = { struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, + unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -813,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { @@ -830,6 +840,8 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + hdev->quirks = quirks; + return hdev; } EXPORT_SYMBOL(nfc_hci_allocate_device); @@ -868,6 +880,28 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) { struct hci_msg *msg, *n; + mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->cmd_pending_msg) { + if (hdev->cmd_pending_msg->cb) + hdev->cmd_pending_msg->cb( + hdev->cmd_pending_msg->cb_context, + NULL, -ESHUTDOWN); + kfree(hdev->cmd_pending_msg); + hdev->cmd_pending_msg = NULL; + } + + hdev->shutting_down = true; + + mutex_unlock(&hdev->msg_tx_mutex); + + del_timer_sync(&hdev->cmd_timer); + cancel_work_sync(&hdev->msg_tx_work); + + cancel_work_sync(&hdev->msg_rx_work); + + nfc_unregister_device(hdev->ndev); + skb_queue_purge(&hdev->rx_hcp_frags); skb_queue_purge(&hdev->msg_rx_queue); @@ -876,13 +910,6 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) skb_queue_purge(&msg->msg_frags); kfree(msg); } - - del_timer_sync(&hdev->cmd_timer); - - nfc_unregister_device(hdev->ndev); - - cancel_work_sync(&hdev->msg_tx_work); - cancel_work_sync(&hdev->msg_rx_work); } EXPORT_SYMBOL(nfc_hci_unregister_device); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index bc308a7ca609..b6b4109f2343 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -105,6 +105,13 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, } mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->shutting_down) { + err = -ESHUTDOWN; + mutex_unlock(&hdev->msg_tx_mutex); + goto out_skb_err; + } + list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index df24be48d4da..c6bc3bd95052 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -304,6 +304,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index ec43914c92a9..85bc75c38dea 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -54,7 +54,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); if (local == NULL) return; @@ -668,6 +667,8 @@ static void nfc_llcp_tx_work(struct work_struct *work) if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); @@ -781,9 +782,15 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, /* There is no sequence with UI frames */ skb_pull(skb, LLCP_HEADER_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * UI frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); + kfree_skb(skb); } nfc_llcp_sock_put(llcp_sock); @@ -976,9 +983,15 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, pr_err("Received out of sequence I PDU\n"); skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * I frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); + kfree_skb(skb); } } @@ -1245,6 +1258,8 @@ static void nfc_llcp_rx_work(struct work_struct *work) print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); switch (ptype) { @@ -1296,6 +1311,13 @@ static void nfc_llcp_rx_work(struct work_struct *work) local->rx_pending = NULL; } +static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb) +{ + local->rx_pending = skb; + del_timer(&local->link_timer); + schedule_work(&local->rx_work); +} + void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; @@ -1306,9 +1328,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) return; } - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); } int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) @@ -1319,9 +1339,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) if (local == NULL) return -ENODEV; - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); return 0; } diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 0d62366f8cc3..0eae5c509504 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -121,7 +121,6 @@ struct nfc_llcp_sock { struct sk_buff_head tx_queue; struct sk_buff_head tx_pending_queue; - struct sk_buff_head tx_backlog_queue; struct list_head accept_queue; struct sock *parent; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index fea22eb41b82..5332751943a9 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -672,25 +672,27 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; } + sock_recv_timestamp(msg, sk, skb); + if (sk->sk_type == SOCK_DGRAM && msg->msg_name) { struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb); - struct sockaddr_nfc_llcp sockaddr; + struct sockaddr_nfc_llcp *sockaddr = + (struct sockaddr_nfc_llcp *) msg->msg_name; - pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp); - sockaddr.sa_family = AF_NFC; - sockaddr.nfc_protocol = NFC_PROTO_NFC_DEP; - sockaddr.dsap = ui_cb->dsap; - sockaddr.ssap = ui_cb->ssap; + pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); - memcpy(msg->msg_name, &sockaddr, sizeof(sockaddr)); - msg->msg_namelen = sizeof(sockaddr); + sockaddr->sa_family = AF_NFC; + sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; + sockaddr->dsap = ui_cb->dsap; + sockaddr->ssap = ui_cb->ssap; } /* Mark read part of skb as used */ @@ -806,7 +808,6 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) llcp_sock->reserved_ssap = LLCP_SAP_MAX; skb_queue_head_init(&llcp_sock->tx_queue); skb_queue_head_init(&llcp_sock->tx_pending_queue); - skb_queue_head_init(&llcp_sock->tx_backlog_queue); INIT_LIST_HEAD(&llcp_sock->accept_queue); if (sock != NULL) @@ -821,7 +822,6 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); list_del_init(&sock->accept_queue); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1bf039..48ada0ec749e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae16786d..504b883439f1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8c13a965459..9dc537df46c4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); if (IS_ERR(segs)) return PTR_ERR(segs); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index a9327e2e48ce..670cbc3518de 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -35,10 +35,11 @@ /* Must be called with rcu_read_lock. */ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) { - if (unlikely(!vport)) { - kfree_skb(skb); - return; - } + if (unlikely(!vport)) + goto error; + + if (unlikely(skb_warn_if_lro(skb))) + goto error; /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). @@ -50,6 +51,10 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_vport_receive(vport, skb); + return; + +error: + kfree_skb(skb); } /* Called with rcu_read_lock and bottom-halves disabled. */ @@ -169,9 +174,6 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) goto error; } - if (unlikely(skb_warn_if_lro(skb))) - goto error; - skb->dev = netdev_vport->dev; len = skb->len; dev_queue_xmit(skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e639645e8fec..c111bd0e083a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2361,13 +2361,15 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req_u, 0, sizeof(req_u)); - - if (po->rx_ring.pg_vec) + if (po->rx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); + } - if (po->tx_ring.pg_vec) + if (po->tx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); + } fanout_release(sk); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8dbd695c160b..823463adbd21 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,8 +22,23 @@ #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) -#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) +struct tcf_police { + struct tcf_common common; + int tcfp_result; + u32 tcfp_ewma_rate; + s64 tcfp_burst; + u32 tcfp_mtu; + s64 tcfp_toks; + s64 tcfp_ptoks; + s64 tcfp_mtu_ptoks; + s64 tcfp_t_c; + struct psched_ratecfg rate; + bool rate_present; + struct psched_ratecfg peak; + bool peak_present; +}; +#define to_police(pc) \ + container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; @@ -108,10 +123,6 @@ static void tcf_police_destroy(struct tcf_police *p) write_unlock_bh(&police_lock); gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); - if (p->tcfp_R_tab) - qdisc_put_rtab(p->tcfp_R_tab); - if (p->tcfp_P_tab) - qdisc_put_rtab(p->tcfp_P_tab); /* * gen_estimator est_timer() might access p->tcf_lock * or bstats, wait a RCU grace period before freeing p @@ -212,26 +223,36 @@ override: } /* No failure allowed after this point */ - if (R_tab != NULL) { - qdisc_put_rtab(police->tcfp_R_tab); - police->tcfp_R_tab = R_tab; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (R_tab) + police->tcfp_mtu = 255 << R_tab->rate.cell_log; + } + if (R_tab) { + police->rate_present = true; + psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + qdisc_put_rtab(R_tab); + } else { + police->rate_present = false; } - if (P_tab != NULL) { - qdisc_put_rtab(police->tcfp_P_tab); - police->tcfp_P_tab = P_tab; + if (P_tab) { + police->peak_present = true; + psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + qdisc_put_rtab(P_tab); + } else { + police->peak_present = false; } if (tb[TCA_POLICE_RESULT]) police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; + police->tcfp_burst = PSCHED_TICKS2NS(parm->burst); + police->tcfp_toks = police->tcfp_burst; + if (police->peak_present) { + police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak, + police->tcfp_mtu); + police->tcfp_ptoks = police->tcfp_mtu_ptoks; } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; if (tb[TCA_POLICE_AVRATE]) @@ -241,7 +262,7 @@ override: if (ret != ACT_P_CREATED) return ret; - police->tcfp_t_c = psched_get_time(); + police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(&police_idx_gen, &police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -287,9 +308,9 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = a->priv; - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; spin_lock(&police->tcf_lock); @@ -305,24 +326,25 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { + if (!police->rate_present) { spin_unlock(&police->tcf_lock); return police->tcfp_result; } - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - police->tcfp_t_c, + police->tcfp_burst); + if (police->peak_present) { ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, qdisc_pkt_len(skb)); + if (ptoks > police->tcfp_mtu_ptoks) + ptoks = police->tcfp_mtu_ptoks; + ptoks -= (s64) psched_l2t_ns(&police->peak, + qdisc_pkt_len(skb)); } toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) + if (toks > police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, qdisc_pkt_len(skb)); + toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; @@ -348,15 +370,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) .index = police->tcf_index, .action = police->tcf_action, .mtu = police->tcfp_mtu, - .burst = police->tcfp_burst, + .burst = PSCHED_NS2TICKS(police->tcfp_burst), .refcnt = police->tcf_refcnt - ref, .bindcnt = police->tcf_bindcnt - bind, }; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; + if (police->rate_present) + opt.rate.rate = psched_ratecfg_getrate(&police->rate); + if (police->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d84f7e734cd7..fe1ba54b93f7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -493,7 +493,7 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) @@ -502,10 +502,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) qdisc_throttled(wd->qdisc); hrtimer_start(&wd->timer, - ns_to_ktime(PSCHED_TICKS2NS(expires)), + ns_to_ktime(expires), HRTIMER_MODE_ABS); } -EXPORT_SYMBOL(qdisc_watchdog_schedule); +EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d81a4478514..ffad48109a22 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -896,3 +897,39 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } + +void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +{ + u64 factor; + u64 mult; + int shift; + + r->rate_bps = rate << 3; + r->shift = 0; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate + * for smallest packet size (64 bytes). Token (time in ns) is + * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will + * work as long as the smallest packet transfer time can be + * accurately represented in nanosec. + */ + if (r->rate_bps > 0) { + /* + * Higher shift gives better accuracy. Find the largest + * shift such that mult fits in 32 bits. + */ + for (shift = 0; shift < 16; shift++) { + r->shift = shift; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + mult = div64_u64(factor, r->rate_bps); + if (mult > UINT_MAX) + break; + } + + r->shift = shift - 1; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + r->mult = div64_u64(factor, r->rate_bps); + } +} +EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 51561eafcb72..03c2692ca01e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -38,6 +38,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> /* HTB algorithm. @@ -71,12 +72,6 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -struct htb_rate_cfg { - u64 rate_bps; - u32 mult; - u32 shift; -}; - /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { struct Qdisc_class_common common; @@ -124,8 +119,8 @@ struct htb_class { int filter_cnt; /* token bucket parameters */ - struct htb_rate_cfg rate; - struct htb_rate_cfg ceil; + struct psched_ratecfg rate; + struct psched_ratecfg ceil; s64 buffer, cbuffer; /* token bucket depth/rate */ psched_tdiff_t mbuffer; /* max wait time */ s64 tokens, ctokens; /* current number of tokens */ @@ -168,45 +163,6 @@ struct htb_sched { struct work_struct work; }; -static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len) -{ - return ((u64)len * r->mult) >> r->shift; -} - -static void htb_precompute_ratedata(struct htb_rate_cfg *r) -{ - u64 factor; - u64 mult; - int shift; - - r->shift = 0; - r->mult = 1; - /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. - */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) - break; - } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); - } -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { @@ -632,7 +588,7 @@ static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->buffer) toks = cl->buffer; - toks -= (s64) l2t_ns(&cl->rate, bytes); + toks -= (s64) psched_l2t_ns(&cl->rate, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -645,7 +601,7 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->cbuffer) toks = cl->cbuffer; - toks -= (s64) l2t_ns(&cl->ceil, bytes); + toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -1134,10 +1090,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = cl->rate.rate_bps >> 3; - opt.buffer = cl->buffer; - opt.ceil.rate = cl->ceil.rate_bps >> 3; - opt.cbuffer = cl->cbuffer; + opt.rate.rate = psched_ratecfg_getrate(&cl->rate); + opt.buffer = PSCHED_NS2TICKS(cl->buffer); + opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); + opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; opt.level = cl->level; @@ -1459,8 +1415,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ - cl->tokens = hopt->buffer; - cl->ctokens = hopt->cbuffer; + cl->tokens = PSCHED_TICKS2NS(hopt->buffer); + cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ cl->t_c = psched_get_time(); cl->cmode = HTB_CAN_SEND; @@ -1503,17 +1459,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - cl->buffer = hopt->buffer; - cl->cbuffer = hopt->cbuffer; - - cl->rate.rate_bps = (u64)hopt->rate.rate << 3; - cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3; - - htb_precompute_ratedata(&cl->rate); - htb_precompute_ratedata(&cl->ceil); + psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); + psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); - cl->buffer = hopt->buffer << PSCHED_SHIFT; - cl->cbuffer = hopt->buffer << PSCHED_SHIFT; + cl->buffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); sch_tree_unlock(sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 298c0ddfb57e..3d2acc7a9c80 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -438,18 +438,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->rate) { struct sk_buff_head *list = &sch->q; - delay += packet_len_2_sched_time(skb->len, q); - if (!skb_queue_empty(list)) { /* - * Last packet in queue is reference point (now). - * First packet in queue is already in flight, - * calculate this time bonus and substract + * Last packet in queue is reference point (now), + * calculate this time bonus and subtract * from delay. */ - delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; + delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay = max_t(psched_tdiff_t, 0, delay); now = netem_skb_cb(skb_peek_tail(list))->time_to_send; } + + delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 4b056c15e90c..c8388f3c3426 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> @@ -100,23 +101,21 @@ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ - u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ - u32 mtu; + s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ + s64 mtu; u32 max_size; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + bool peak_present; /* Variables */ - long tokens; /* Current number of B tokens */ - long ptokens; /* Current number of P tokens */ - psched_time_t t_c; /* Time check-point */ + s64 tokens; /* Current number of B tokens */ + s64 ptokens; /* Current number of P tokens */ + s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q, L) qdisc_l2t((q)->R_tab, L) -#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) - static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) skb = q->qdisc->ops->peek(q->qdisc); if (skb) { - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); - now = psched_get_time(); - toks = psched_tdiff_bounded(now, q->t_c, q->buffer); + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - q->t_c, q->buffer); - if (q->P_tab) { + if (q->peak_present) { ptoks = toks + q->ptokens; - if (ptoks > (long)q->mtu) + if (ptoks > q->mtu) ptoks = q->mtu; - ptoks -= L2T_P(q, len); + ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; - if (toks > (long)q->buffer) + if (toks > q->buffer) toks = q->buffer; - toks -= L2T(q, len); + toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); @@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) return skb; } - qdisc_watchdog_schedule(&q->watchdog, - now + max_t(long, -toks, -ptoks)); + qdisc_watchdog_schedule_ns(&q->watchdog, + now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, @@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); @@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = qopt->mtu; + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = qopt->buffer; + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; - swap(q->R_tab, rtab); - swap(q->P_tab, ptab); + psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + if (ptab) { + psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + q->peak_present = true; + } else { + q->peak_present = false; + } sch_tree_unlock(sch); err = 0; @@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; @@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - - if (q->P_tab) - qdisc_put_rtab(q->P_tab); - if (q->R_tab) - qdisc_put_rtab(q->R_tab); - qdisc_destroy(q->qdisc); } @@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate = q->R_tab->rate; - if (q->P_tab) - opt.peakrate = q->P_tab->rate; + opt.rate.rate = psched_ratecfg_getrate(&q->rate); + if (q->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - opt.mtu = q->mtu; - opt.buffer = q->buffer; + opt.mtu = PSCHED_NS2TICKS(q->mtu); + opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b45ed1f96921..2f95f5a5145d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -434,8 +434,7 @@ void sctp_association_free(struct sctp_association *asoc) * on our state. */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { - if (timer_pending(&asoc->timers[i]) && - del_timer(&asoc->timers[i])) + if (del_timer(&asoc->timers[i])) sctp_association_put(asoc); } @@ -1497,7 +1496,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) /* Stop the SACK timer. */ timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); } } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 159b9bc5d633..ba1dfc3f8def 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -71,7 +71,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key) return; if (atomic_dec_and_test(&key->refcnt)) { - kfree(key); + kzfree(key); SCTP_DBG_OBJCNT_DEC(keys); } } @@ -200,27 +200,28 @@ static struct sctp_auth_bytes *sctp_auth_make_key_vector( struct sctp_auth_bytes *new; __u32 len; __u32 offset = 0; + __u16 random_len, hmacs_len, chunks_len = 0; - len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length); - if (chunks) - len += ntohs(chunks->param_hdr.length); + random_len = ntohs(random->param_hdr.length); + hmacs_len = ntohs(hmacs->param_hdr.length); + if (chunks) + chunks_len = ntohs(chunks->param_hdr.length); - new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp); + len = random_len + hmacs_len + chunks_len; + + new = sctp_auth_create_key(len, gfp); if (!new) return NULL; - new->len = len; - - memcpy(new->data, random, ntohs(random->param_hdr.length)); - offset += ntohs(random->param_hdr.length); + memcpy(new->data, random, random_len); + offset += random_len; if (chunks) { - memcpy(new->data + offset, chunks, - ntohs(chunks->param_hdr.length)); - offset += ntohs(chunks->param_hdr.length); + memcpy(new->data + offset, chunks, chunks_len); + offset += chunks_len; } - memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length)); + memcpy(new->data + offset, hmacs, hmacs_len); return new; } @@ -350,8 +351,8 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, gfp); out: - kfree(local_key_vector); - kfree(peer_key_vector); + sctp_auth_key_put(local_key_vector); + sctp_auth_key_put(peer_key_vector); return secret; } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 17a001bac2cc..73aad3d16a45 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -151,9 +151,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ - get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); - ep->last_key = ep->current_key = 0; - ep->key_changed_at = jiffies; + get_random_bytes(ep->secret_key, sizeof(ep->secret_key)); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); @@ -271,6 +269,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); + memset(ep->secret_key, 0, sizeof(ep->secret_key)); + /* Remove and free the port */ if (sctp_sk(ep->base.sk)->bind_hash) sctp_put_port(ep->base.sk); diff --git a/net/sctp/input.c b/net/sctp/input.c index 8bd3c279427e..965bbbbe48d4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -468,8 +468,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } else { struct net *net = sock_net(sk); - if (timer_pending(&t->proto_unreach_timer) && - del_timer(&t->proto_unreach_timer)) + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); sctp_do_sm(net, SCTP_EVENT_T_OTHER, diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 379c81dee9d1..01dca753db16 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -224,7 +224,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) /* Free the outqueue structure and any related pending chunks. */ -void sctp_outq_teardown(struct sctp_outq *q) +static void __sctp_outq_teardown(struct sctp_outq *q) { struct sctp_transport *transport; struct list_head *lchunk, *temp; @@ -277,8 +277,6 @@ void sctp_outq_teardown(struct sctp_outq *q) sctp_chunk_free(chunk); } - q->error = 0; - /* Throw away any leftover control chunks. */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { list_del_init(&chunk->list); @@ -286,11 +284,17 @@ void sctp_outq_teardown(struct sctp_outq *q) } } +void sctp_outq_teardown(struct sctp_outq *q) +{ + __sctp_outq_teardown(q); + sctp_outq_init(q->asoc, q); +} + /* Free the outqueue structure and any related pending chunks. */ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ - sctp_outq_teardown(q); + __sctp_outq_teardown(q); /* If we were kmalloc()'d, free the memory. */ if (q->malloced) @@ -1696,10 +1700,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, * address. */ if (!transport->flight_size) { - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) { + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); - } } else if (restart_timer) { if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 5f7518de2fd1..261b7b9858a4 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -122,12 +122,12 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -sctp_disposition_t jsctp_sf_eat_sack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) { struct sctp_transport *sp; static __u32 lcwnd = 0; @@ -183,6 +183,13 @@ static __init int sctpprobe_init(void) { int ret = -ENOMEM; + /* Warning: if the function signature of sctp_sf_eat_sack_6_2, + * has been changed, you also have to change the signature of + * jsctp_sf_eat_sack, otherwise you end up right here! + */ + BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, + jsctp_sf_eat_sack) == 0); + init_waitqueue_head(&sctpw.wait); spin_lock_init(&sctpw.lock); if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e1c5fc2be6b8..a193f3bc8144 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1589,8 +1589,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct sctp_signed_cookie *cookie; struct scatterlist sg; int headersize, bodysize; - unsigned int keylen; - char *key; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1650,12 +1648,11 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Sign the message. */ sg_init_one(&sg, &cookie->c, bodysize); - keylen = SCTP_SECRET_SIZE; - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) goto free_cookie; } @@ -1682,8 +1679,7 @@ struct sctp_association *sctp_unpack_cookie( int headersize, bodysize, fixed_size; __u8 *digest = ep->digest; struct scatterlist sg; - unsigned int keylen, len; - char *key; + unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; @@ -1718,34 +1714,21 @@ struct sctp_association *sctp_unpack_cookie( goto no_hmac; /* Check the signature. */ - keylen = SCTP_SECRET_SIZE; sg_init_one(&sg, bear_cookie, bodysize); - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, digest)) { *error = -SCTP_IERROR_NOMEM; goto fail; } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Try the previous key. */ - key = (char *)ep->secret_key[ep->last_key]; - memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || - crypto_hash_digest(&desc, &sg, bodysize, digest)) { - *error = -SCTP_IERROR_NOMEM; - goto fail; - } - - if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Yikes! Still bad signature! */ - *error = -SCTP_IERROR_BAD_SIG; - goto fail; - } + *error = -SCTP_IERROR_BAD_SIG; + goto fail; } no_hmac: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c9577754a708..8aab894aeabe 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -674,10 +674,8 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (timer_pending(&t->T3_rtx_timer) && - del_timer(&t->T3_rtx_timer)) { + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } } } @@ -1517,7 +1515,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TIMER_STOP: timer = &asoc->timers[cmd->obj.to]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 618ec7e216ca..5131fcfedb03 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1779,8 +1779,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return SCTP_DISPOSITION_CONSUME; nomem_ev: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9e65758cb038..cedd9bf67b8c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3390,7 +3390,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); out: - kfree(authkey); + kzfree(authkey); return ret; } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 043889ac86c0..bf3c6e8fc401 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -366,7 +366,11 @@ int sctp_sysctl_net_register(struct net *net) void sctp_sysctl_net_unregister(struct net *net) { + struct ctl_table *table; + + table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); + kfree(table); } static struct ctl_table_header * sctp_sysctl_header; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4e45bb68aef0..fafd2a461ba0 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -151,13 +151,11 @@ void sctp_transport_free(struct sctp_transport *transport) * structure hang around in memory since we know * the tranport is going away. */ - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ - if (timer_pending(&transport->proto_unreach_timer) && - del_timer(&transport->proto_unreach_timer)) + if (del_timer(&transport->proto_unreach_timer)) sctp_association_put(transport->asoc); sctp_transport_put(transport); @@ -168,10 +166,6 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) struct sctp_transport *transport; transport = container_of(head, struct sctp_transport, rcu); - if (transport->asoc) - sctp_association_put(transport->asoc); - - sctp_packet_free(&transport->packet); dst_release(transport->dst); kfree(transport); @@ -186,6 +180,11 @@ static void sctp_transport_destroy(struct sctp_transport *transport) SCTP_ASSERT(transport->dead, "Transport is not dead", return); call_rcu(&transport->rcu, sctp_transport_destroy_rcu); + + sctp_packet_free(&transport->packet); + + if (transport->asoc) + sctp_association_put(transport->asoc); } /* Start T3_rtx timer if it is not already running and update the heartbeat @@ -654,10 +653,9 @@ void sctp_transport_reset(struct sctp_transport *t) void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ - if (timer_pending(&t->T3_rtx_timer)) { - (void)del_timer(&t->T3_rtx_timer); + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } + sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); if (!timer_pending(&t->T3_rtx_timer)) { if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) diff --git a/net/socket.c b/net/socket.c index 2ca51c719ef9..ee0d029e5130 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,7 +69,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> -#include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> #include <linux/if_vlan.h> @@ -2838,7 +2837,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) } ifr = compat_alloc_user_space(buf_size); - rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; @@ -2862,12 +2861,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) offsetof(struct ethtool_rxnfc, fs.ring_cookie)); if (copy_in_user(rxnfc, compat_rxnfc, - (void *)(&rxnfc->fs.m_ext + 1) - - (void *)rxnfc) || + (void __user *)(&rxnfc->fs.m_ext + 1) - + (void __user *)rxnfc) || copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, - (void *)(&rxnfc->fs.location + 1) - - (void *)&rxnfc->fs.ring_cookie) || + (void __user *)(&rxnfc->fs.location + 1) - + (void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; @@ -2879,12 +2878,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (convert_out) { if (copy_in_user(compat_rxnfc, rxnfc, - (const void *)(&rxnfc->fs.m_ext + 1) - - (const void *)rxnfc) || + (const void __user *)(&rxnfc->fs.m_ext + 1) - + (const void __user *)rxnfc) || copy_in_user(&compat_rxnfc->fs.ring_cookie, &rxnfc->fs.ring_cookie, - (const void *)(&rxnfc->fs.location + 1) - - (const void *)&rxnfc->fs.ring_cookie) || + (const void __user *)(&rxnfc->fs.location + 1) - + (const void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index bfa31714581f..fb20f25ddec9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -98,9 +98,25 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) +{ + struct list_head *q = &queue->tasks[queue->priority]; + struct rpc_task *task; + + if (!list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + if (task->tk_owner == queue->owner) + list_move_tail(&task->u.tk_wait.list, q); + } +} + static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { - queue->priority = priority; + if (queue->priority != priority) { + /* Fairness: rotate the list when changing priority */ + rpc_rotate_queue_owner(queue); + queue->priority = priority; + } } static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0a148c9d2a5c..0f679df7d072 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -465,7 +465,7 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, } /* - * See net/ipv6/datagram.c : datagram_recv_ctl + * See net/ipv6/datagram.c : ip6_datagram_recv_ctl */ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, struct cmsghdr *cmh) diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 000000000000..b5fa7e40cdcb --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 000000000000..2ce52d70f224 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 000000000000..54bb7bdf92d3 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2015 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include <linux/types.h> + +#define EXPORT_SYMTAB +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" +#include "vsock_version.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 000000000000..7d64d3609ec9 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/vm_sockets.h> + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 000000000000..e8a87cf37072 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2157 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> + +#define EXPORT_SYMTAB +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 000000000000..1bf991803ec0 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 000000000000..9a730744e7bc --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 000000000000..7df793249b6c --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include <linux/types.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/vm_sockets.h> + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 000000000000..622bd7aa1016 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 000000000000..b7df1aea7c59 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 000000000000..cdfbcefdf843 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include <linux/vm_sockets.h> + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h new file mode 100644 index 000000000000..4df7f5e2151c --- /dev/null +++ b/net/vmw_vsock/vsock_version.h @@ -0,0 +1,22 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2011-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_VERSION_H_ +#define _VSOCK_VERSION_H_ + +#define VSOCK_DRIVER_VERSION_PARTS { 1, 0, 0, 0 } +#define VSOCK_DRIVER_VERSION_STRING "1.0.0.0-k" + +#endif /* _VSOCK_VERSION_H_ */ diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e18..000000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from <ftp://ftp.sangoma.com/>. - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc48078..000000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767e..000000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7e..000000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/capability.h> -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> /* support for loadable modules */ -#include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/mutex.h> -#include <linux/mm.h> -#include <linux/string.h> /* inline mem*, str* functions */ - -#include <asm/byteorder.h> /* htons(), etc. */ -#include <linux/wanrouter.h> /* WAN router API definitions */ - -#include <linux/vmalloc.h> /* vmalloc, vfree */ -#include <asm/uaccess.h> /* copy_to/from_user */ -#include <linux/init.h> /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96bb..000000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include <linux/init.h> /* __initfunc et al. */ -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/wanrouter.h> /* WAN router API definitions */ -#include <linux/seq_file.h> -#include <linux/mutex.h> - -#include <net/net_namespace.h> -#include <asm/io.h> - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * <device> entry for each WAN device - */ - -/* - * Generic /proc/net/router/<file> file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 324e8d851dc4..a4a14e8f55cc 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -46,3 +46,65 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } + +void cfg80211_ch_switch_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ch_switch_notify(dev, chandef); + + wdev_lock(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) + goto out; + + wdev->channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); +out: + wdev_unlock(wdev); + return; +} +EXPORT_SYMBOL(cfg80211_ch_switch_notify); + +bool cfg80211_rx_spurious_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_spurious_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); + +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a7990bb16529..396373f3ec26 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -76,6 +76,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; if (!chandef->center_freq2) return false; + /* adjacent is not allowed -- that's a 160 MHz channel */ + if (chandef->center_freq1 - chandef->center_freq2 == 80 || + chandef->center_freq2 - chandef->center_freq1 == 80) + return false; break; case NL80211_CHAN_WIDTH_80: if (chandef->center_freq1 != control_freq + 30 && diff --git a/net/wireless/core.c b/net/wireless/core.c index b677eab55b68..40dbe37cfbf6 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -57,9 +57,6 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { @@ -74,10 +71,8 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev; - if (!wiphy) - return WIPHY_IDX_STALE; - rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + return rdev->wiphy_idx; } @@ -86,9 +81,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); @@ -309,7 +301,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy_idx = wiphy_counter++; - if (unlikely(!wiphy_idx_valid(rdev->wiphy_idx))) { + if (unlikely(rdev->wiphy_idx < 0)) { wiphy_counter--; mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ @@ -390,8 +382,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -406,6 +401,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; @@ -478,6 +478,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/core.h b/net/wireless/core.h index 3563097169cb..8396f7671c8d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -18,6 +18,9 @@ #include <net/cfg80211.h> #include "reg.h" + +#define WIPHY_IDX_INVALID -1 + struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; @@ -86,7 +89,7 @@ struct cfg80211_registered_device { /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ - struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); + struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline @@ -96,13 +99,6 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) return container_of(wiphy, struct cfg80211_registered_device, wiphy); } -/* Note 0 is valid, hence phy0 */ -static inline -bool wiphy_idx_valid(int wiphy_idx) -{ - return wiphy_idx >= 0; -} - static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { @@ -126,12 +122,6 @@ static inline void assert_cfg80211_lock(void) lockdep_assert_held(&cfg80211_mutex); } -/* - * You can use this to mark a wiphy_idx as not having an associated wiphy. - * It guarantees cfg80211_rdev_by_wiphy_idx(wiphy_idx) will return NULL - */ -#define WIPHY_IDX_STALE -1 - struct cfg80211_internal_bss { struct list_head list; struct rb_node rbn; @@ -435,7 +425,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -443,7 +434,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -460,7 +451,7 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); } void diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index f9d6ce5cfabb..55957a284f6c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -44,6 +44,10 @@ #define MESH_SYNC_NEIGHBOR_OFFSET_MAX 50 +#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ +#define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ + const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, .dot11MeshConfirmTimeout = MESH_CONF_T, @@ -69,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { @@ -79,6 +85,8 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, + .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5e8123ee63fd..461e692cdfec 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -987,65 +987,3 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); - -void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_ch_switch_notify(dev, chandef); - - wdev_lock(wdev); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) - goto out; - - wdev->channel = chandef->chan; - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; -} -EXPORT_SYMBOL(cfg80211_ch_switch_notify); - -bool cfg80211_rx_spurious_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; - - trace_cfg80211_rx_spurious_frame(dev, addr); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; -} -EXPORT_SYMBOL(cfg80211_rx_spurious_frame); - -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; - - trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO && - wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; -} -EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f45706adaf34..b5978ab4ad7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -365,6 +365,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -856,6 +858,9 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -1265,6 +1270,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2079,6 +2090,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; + if (type == NL80211_IFTYPE_P2P_DEVICE && info->attrs[NL80211_ATTR_MAC]) { + nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], + ETH_ALEN); + if (!is_valid_ether_addr(params.macaddr)) + return -EADDRNOTAVAIL; + } + if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); @@ -2481,6 +2499,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2724,6 +2833,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2732,6 +2847,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -3001,6 +3119,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3188,13 +3318,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } - if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) - params.listen_interval = - nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - - if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) - params.ht_capa = - nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL] || + info->attrs[NL80211_ATTR_HT_CAPABILITY]) + return -EINVAL; if (!rdev->ops->change_station) return -EOPNOTSUPP; @@ -3210,6 +3336,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3217,6 +3354,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3231,11 +3370,25 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + + /* reject other things that can't change */ + if (params.supported_rates) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3255,9 +3408,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; - if (params.ht_capa) - return -EINVAL; - if (params.listen_interval >= 0) + if (params.local_pm) return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) @@ -3267,9 +3418,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things mesh doesn't support */ if (params.vlan) return -EINVAL; - if (params.ht_capa) - return -EINVAL; - if (params.listen_interval >= 0) + if (params.supported_rates) return -EINVAL; /* * No special handling for TDLS here -- the userspace @@ -3393,17 +3542,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; @@ -3787,12 +3950,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) * window between nl80211_init() and regulatory_init(), if that is * even possible. */ - mutex_lock(&cfg80211_mutex); - if (unlikely(!cfg80211_regdomain)) { - mutex_unlock(&cfg80211_mutex); + if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - } - mutex_unlock(&cfg80211_mutex); if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; @@ -3908,7 +4067,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3947,6 +4110,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3967,13 +4132,15 @@ static int nl80211_parse_mesh_config(struct genl_info *info, struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; -#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ -do {\ - if (table[attr_num]) {\ - cfg->param = nla_fn(table[attr_num]); \ - mask |= (1 << (attr_num - 1)); \ - } \ -} while (0);\ +#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \ +do { \ + if (tb[attr]) { \ + if (fn(tb[attr]) < min || fn(tb[attr]) > max) \ + return -EINVAL; \ + cfg->param = fn(tb[attr]); \ + mask |= (1 << (attr - 1)); \ + } \ +} while (0) if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) @@ -3988,83 +4155,98 @@ do {\ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255, mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255, mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255, mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16, mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255, mask, NL80211_MESHCONF_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255, mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, + 1, 255, mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255, mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535, mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535, mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, + 1, 65535, mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, - mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, - mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPnetDiameterTraversalTime, mask, + dot11MeshHWMPnetDiameterTraversalTime, + 1, 65535, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, - NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, - NL80211_MESHCONF_HWMP_RANN_INTERVAL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4, + mask, NL80211_MESHCONF_HWMP_ROOTMODE, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535, + mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshGateAnnouncementProtocol, mask, - NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + dot11MeshGateAnnouncementProtocol, 0, 1, + mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, - mask, + 1, 65535, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPconfirmationInterval, mask, + dot11MeshHWMPconfirmationInterval, + 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; @@ -4152,6 +4334,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) { + const struct ieee80211_regdomain *regdom; struct sk_buff *msg; void *hdr = NULL; struct nlattr *nl_reg_rules; @@ -4174,35 +4357,36 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto put_failure; - if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, - cfg80211_regdomain->alpha2) || - (cfg80211_regdomain->dfs_region && - nla_put_u8(msg, NL80211_ATTR_DFS_REGION, - cfg80211_regdomain->dfs_region))) - goto nla_put_failure; - if (reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure; + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + + if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || + (regdom->dfs_region && + nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) + goto nla_put_failure_rcu; + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) - goto nla_put_failure; + goto nla_put_failure_rcu; - for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; const struct ieee80211_reg_rule *reg_rule; const struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule; - reg_rule = &cfg80211_regdomain->reg_rules[i]; + reg_rule = ®dom->reg_rules[i]; freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; nl_reg_rule = nla_nest_start(msg, i); if (!nl_reg_rule) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS, reg_rule->flags) || @@ -4216,10 +4400,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, power_rule->max_eirp)) - goto nla_put_failure; + goto nla_put_failure_rcu; nla_nest_end(msg, nl_reg_rule); } + rcu_read_unlock(); nla_nest_end(msg, nl_reg_rules); @@ -4227,6 +4412,8 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) err = genlmsg_reply(msg, info); goto out; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: @@ -4259,27 +4446,18 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { num_rules++; if (num_rules > NL80211_MAX_SUPP_REG_RULES) return -EINVAL; } - mutex_lock(&cfg80211_mutex); - - if (!reg_is_valid_request(alpha2)) { - r = -EINVAL; - goto bad_reg; - } - size_of_regd = sizeof(struct ieee80211_regdomain) + - (num_rules * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); - if (!rd) { - r = -ENOMEM; - goto bad_reg; - } + if (!rd) + return -ENOMEM; rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; @@ -4293,10 +4471,10 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) rd->dfs_region = dfs_region; nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, - nla_data(nl_reg_rule), nla_len(nl_reg_rule), - reg_rule_policy); + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); if (r) goto bad_reg; @@ -4309,16 +4487,14 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - BUG_ON(rule_idx != num_rules); + mutex_lock(&cfg80211_mutex); r = set_regdom(rd); - + /* set_regdom took ownership */ + rd = NULL; mutex_unlock(&cfg80211_mutex); - return r; - bad_reg: - mutex_unlock(&cfg80211_mutex); kfree(rd); return r; } @@ -5867,6 +6043,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, @@ -6652,6 +6837,21 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + setup.beacon_interval = + nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); + if (setup.beacon_interval < 10 || + setup.beacon_interval > 10000) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { + setup.dtim_period = + nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); + if (setup.dtim_period < 1 || setup.dtim_period > 100) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); @@ -7784,6 +7984,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -8051,7 +8259,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) goto nla_put_failure; } - if (wiphy_idx_valid(request->wiphy_idx) && + if (request->wiphy_idx != WIPHY_IDX_INVALID && nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) goto nla_put_failure; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c8191f837..422d38291d66 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 82c4fc7c994c..de02d633c212 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -48,7 +48,6 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/random.h> #include <linux/ctype.h> #include <linux/nl80211.h> #include <linux/platform_device.h> @@ -66,6 +65,13 @@ #define REG_DBG_PRINT(args...) #endif +enum reg_request_treatment { + REG_REQ_OK, + REG_REQ_IGNORE, + REG_REQ_INTERSECT, + REG_REQ_ALREADY_SET, +}; + static struct regulatory_request core_request_world = { .initiator = NL80211_REGDOM_SET_BY_CORE, .alpha2[0] = '0', @@ -76,7 +82,8 @@ static struct regulatory_request core_request_world = { }; /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request = &core_request_world; +static struct regulatory_request __rcu *last_request = + (void __rcu *)&core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -88,16 +95,16 @@ static struct device_type reg_device_type = { /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no - * information to give us an alpha2 + * information to give us an alpha2. */ -const struct ieee80211_regdomain *cfg80211_regdomain; +const struct ieee80211_regdomain __rcu *cfg80211_regdomain; /* * Protects static reg.c components: - * - cfg80211_world_regdom - * - cfg80211_regdom - * - last_request - * - reg_num_devs_support_basehint + * - cfg80211_regdomain (if not used with RCU) + * - cfg80211_world_regdom + * - last_request (if not used with RCU) + * - reg_num_devs_support_basehint */ static DEFINE_MUTEX(reg_mutex); @@ -112,6 +119,31 @@ static inline void assert_reg_lock(void) lockdep_assert_held(®_mutex); } +static const struct ieee80211_regdomain *get_cfg80211_regdom(void) +{ + return rcu_dereference_protected(cfg80211_regdomain, + lockdep_is_held(®_mutex)); +} + +static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) +{ + return rcu_dereference_protected(wiphy->regd, + lockdep_is_held(®_mutex)); +} + +static void rcu_free_regdom(const struct ieee80211_regdomain *r) +{ + if (!r) + return; + kfree_rcu((struct ieee80211_regdomain *)r, rcu_head); +} + +static struct regulatory_request *get_last_request(void) +{ + return rcu_dereference_check(last_request, + lockdep_is_held(®_mutex)); +} + /* Used to queue up regulatory hints */ static LIST_HEAD(reg_requests_list); static spinlock_t reg_requests_lock; @@ -177,28 +209,37 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(bool full_reset) +static void reset_regdomains(bool full_reset, + const struct ieee80211_regdomain *new_regdom) { + const struct ieee80211_regdomain *r; + struct regulatory_request *lr; + + assert_reg_lock(); + + r = get_cfg80211_regdom(); + /* avoid freeing static information or freeing something twice */ - if (cfg80211_regdomain == cfg80211_world_regdom) - cfg80211_regdomain = NULL; + if (r == cfg80211_world_regdom) + r = NULL; if (cfg80211_world_regdom == &world_regdom) cfg80211_world_regdom = NULL; - if (cfg80211_regdomain == &world_regdom) - cfg80211_regdomain = NULL; + if (r == &world_regdom) + r = NULL; - kfree(cfg80211_regdomain); - kfree(cfg80211_world_regdom); + rcu_free_regdom(r); + rcu_free_regdom(cfg80211_world_regdom); cfg80211_world_regdom = &world_regdom; - cfg80211_regdomain = NULL; + rcu_assign_pointer(cfg80211_regdomain, new_regdom); if (!full_reset) return; - if (last_request != &core_request_world) - kfree(last_request); - last_request = &core_request_world; + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); + rcu_assign_pointer(last_request, &core_request_world); } /* @@ -207,30 +248,29 @@ static void reset_regdomains(bool full_reset) */ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { - BUG_ON(!last_request); + struct regulatory_request *lr; + + lr = get_last_request(); + + WARN_ON(!lr); - reset_regdomains(false); + reset_regdomains(false, rd); cfg80211_world_regdom = rd; - cfg80211_regdomain = rd; } bool is_world_regdom(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] == '0' && alpha2[1] == '0') - return true; - return false; + return alpha2[0] == '0' && alpha2[1] == '0'; } static bool is_alpha2_set(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] != 0 && alpha2[1] != 0) - return true; - return false; + return alpha2[0] && alpha2[1]; } static bool is_unknown_alpha2(const char *alpha2) @@ -241,9 +281,7 @@ static bool is_unknown_alpha2(const char *alpha2) * Special case where regulatory domain was built by driver * but a specific alpha2 cannot be determined */ - if (alpha2[0] == '9' && alpha2[1] == '9') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '9'; } static bool is_intersected_alpha2(const char *alpha2) @@ -255,39 +293,30 @@ static bool is_intersected_alpha2(const char *alpha2) * result of an intersection between two regulatory domain * structures */ - if (alpha2[0] == '9' && alpha2[1] == '8') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '8'; } static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - if (isalpha(alpha2[0]) && isalpha(alpha2[1])) - return true; - return false; + return isalpha(alpha2[0]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) { if (!alpha2_x || !alpha2_y) return false; - if (alpha2_x[0] == alpha2_y[0] && - alpha2_x[1] == alpha2_y[1]) - return true; - return false; + return alpha2_x[0] == alpha2_y[0] && alpha2_x[1] == alpha2_y[1]; } static bool regdom_changes(const char *alpha2) { - assert_cfg80211_lock(); + const struct ieee80211_regdomain *r = get_cfg80211_regdom(); - if (!cfg80211_regdomain) + if (!r) return true; - if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) - return false; - return true; + return !alpha2_equal(r->alpha2, alpha2); } /* @@ -301,38 +330,36 @@ static bool is_user_regdom_saved(void) return false; /* This would indicate a mistake on the design */ - if (WARN((!is_world_regdom(user_alpha2) && - !is_an_alpha2(user_alpha2)), + if (WARN(!is_world_regdom(user_alpha2) && !is_an_alpha2(user_alpha2), "Unexpected user alpha2: %c%c\n", - user_alpha2[0], - user_alpha2[1])) + user_alpha2[0], user_alpha2[1])) return false; return true; } -static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, - const struct ieee80211_regdomain *src_regd) +static const struct ieee80211_regdomain * +reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd = 0; + int size_of_regd; unsigned int i; - size_of_regd = sizeof(struct ieee80211_regdomain) + - ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule)); + size_of_regd = + sizeof(struct ieee80211_regdomain) + + src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); regd = kzalloc(size_of_regd, GFP_KERNEL); if (!regd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); for (i = 0; i < src_regd->n_reg_rules; i++) memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], - sizeof(struct ieee80211_reg_rule)); + sizeof(struct ieee80211_reg_rule)); - *dst_regd = regd; - return 0; + return regd; } #ifdef CONFIG_CFG80211_INTERNAL_REGDB @@ -347,9 +374,8 @@ static DEFINE_MUTEX(reg_regdb_search_mutex); static void reg_regdb_search(struct work_struct *work) { struct reg_regdb_search_request *request; - const struct ieee80211_regdomain *curdom, *regdom; - int i, r; - bool set_reg = false; + const struct ieee80211_regdomain *curdom, *regdom = NULL; + int i; mutex_lock(&cfg80211_mutex); @@ -360,14 +386,11 @@ static void reg_regdb_search(struct work_struct *work) list); list_del(&request->list); - for (i=0; i<reg_regdb_size; i++) { + for (i = 0; i < reg_regdb_size; i++) { curdom = reg_regdb[i]; - if (!memcmp(request->alpha2, curdom->alpha2, 2)) { - r = reg_copy_regd(®dom, curdom); - if (r) - break; - set_reg = true; + if (alpha2_equal(request->alpha2, curdom->alpha2)) { + regdom = reg_copy_regd(curdom); break; } } @@ -376,7 +399,7 @@ static void reg_regdb_search(struct work_struct *work) } mutex_unlock(®_regdb_search_mutex); - if (set_reg) + if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); mutex_unlock(&cfg80211_mutex); @@ -434,15 +457,14 @@ static int call_crda(const char *alpha2) return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } -/* Used by nl80211 before kmalloc'ing our regulatory domain */ -bool reg_is_valid_request(const char *alpha2) +static bool reg_is_valid_request(const char *alpha2) { - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); - if (!last_request) + if (!lr || lr->processed) return false; - return alpha2_equal(last_request->alpha2, alpha2); + return alpha2_equal(lr->alpha2, alpha2); } /* Sanity check on a regulatory rule */ @@ -460,7 +482,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->end_freq_khz <= freq_range->start_freq_khz || - freq_range->max_bandwidth_khz > freq_diff) + freq_range->max_bandwidth_khz > freq_diff) return false; return true; @@ -487,8 +509,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) } static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, - u32 center_freq_khz, - u32 bw_khz) + u32 center_freq_khz, u32 bw_khz) { u32 start_freq_khz, end_freq_khz; @@ -518,7 +539,7 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * regulatory rule support for other "bands". **/ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, - u32 freq_khz) + u32 freq_khz) { #define ONE_GHZ_IN_KHZ 1000000 /* @@ -540,10 +561,9 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ -static int reg_rules_intersect( - const struct ieee80211_reg_rule *rule1, - const struct ieee80211_reg_rule *rule2, - struct ieee80211_reg_rule *intersected_rule) +static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1, + const struct ieee80211_reg_rule *rule2, + struct ieee80211_reg_rule *intersected_rule) { const struct ieee80211_freq_range *freq_range1, *freq_range2; struct ieee80211_freq_range *freq_range; @@ -560,11 +580,11 @@ static int reg_rules_intersect( power_rule = &intersected_rule->power_rule; freq_range->start_freq_khz = max(freq_range1->start_freq_khz, - freq_range2->start_freq_khz); + freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, - freq_range2->end_freq_khz); + freq_range2->end_freq_khz); freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz, - freq_range2->max_bandwidth_khz); + freq_range2->max_bandwidth_khz); freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->max_bandwidth_khz > freq_diff) @@ -575,7 +595,7 @@ static int reg_rules_intersect( power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, power_rule2->max_antenna_gain); - intersected_rule->flags = (rule1->flags | rule2->flags); + intersected_rule->flags = rule1->flags | rule2->flags; if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; @@ -596,9 +616,9 @@ static int reg_rules_intersect( * resulting intersection of rules between rd1 and rd2. We will * kzalloc() this structure for you. */ -static struct ieee80211_regdomain *regdom_intersect( - const struct ieee80211_regdomain *rd1, - const struct ieee80211_regdomain *rd2) +static struct ieee80211_regdomain * +regdom_intersect(const struct ieee80211_regdomain *rd1, + const struct ieee80211_regdomain *rd2) { int r, size_of_regd; unsigned int x, y; @@ -607,12 +627,7 @@ static struct ieee80211_regdomain *regdom_intersect( struct ieee80211_reg_rule *intersected_rule; struct ieee80211_regdomain *rd; /* This is just a dummy holder to help us count */ - struct ieee80211_reg_rule irule; - - /* Uses the stack temporarily for counter arithmetic */ - intersected_rule = &irule; - - memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule)); + struct ieee80211_reg_rule dummy_rule; if (!rd1 || !rd2) return NULL; @@ -629,11 +644,8 @@ static struct ieee80211_regdomain *regdom_intersect( rule1 = &rd1->reg_rules[x]; for (y = 0; y < rd2->n_reg_rules; y++) { rule2 = &rd2->reg_rules[y]; - if (!reg_rules_intersect(rule1, rule2, - intersected_rule)) + if (!reg_rules_intersect(rule1, rule2, &dummy_rule)) num_rules++; - memset(intersected_rule, 0, - sizeof(struct ieee80211_reg_rule)); } } @@ -641,15 +653,15 @@ static struct ieee80211_regdomain *regdom_intersect( return NULL; size_of_regd = sizeof(struct ieee80211_regdomain) + - ((num_rules + 1) * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); if (!rd) return NULL; - for (x = 0; x < rd1->n_reg_rules; x++) { + for (x = 0; x < rd1->n_reg_rules && rule_idx < num_rules; x++) { rule1 = &rd1->reg_rules[x]; - for (y = 0; y < rd2->n_reg_rules; y++) { + for (y = 0; y < rd2->n_reg_rules && rule_idx < num_rules; y++) { rule2 = &rd2->reg_rules[y]; /* * This time around instead of using the stack lets @@ -657,8 +669,7 @@ static struct ieee80211_regdomain *regdom_intersect( * a memcpy() */ intersected_rule = &rd->reg_rules[rule_idx]; - r = reg_rules_intersect(rule1, rule2, - intersected_rule); + r = reg_rules_intersect(rule1, rule2, intersected_rule); /* * No need to memset here the intersected rule here as * we're not using the stack anymore @@ -699,34 +710,16 @@ static u32 map_regdom_flags(u32 rd_flags) return channel_flags; } -static int freq_reg_info_regd(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule, - const struct ieee80211_regdomain *custom_regd) +static const struct ieee80211_reg_rule * +freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, + const struct ieee80211_regdomain *regd) { int i; bool band_rule_found = false; - const struct ieee80211_regdomain *regd; bool bw_fits = false; - if (!desired_bw_khz) - desired_bw_khz = MHZ_TO_KHZ(20); - - regd = custom_regd ? custom_regd : cfg80211_regdomain; - - /* - * Follow the driver's regulatory domain, if present, unless a country - * IE has been processed or a user wants to help complaince further - */ - if (!custom_regd && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - last_request->initiator != NL80211_REGDOM_SET_BY_USER && - wiphy->regd) - regd = wiphy->regd; - if (!regd) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < regd->n_reg_rules; i++) { const struct ieee80211_reg_rule *rr; @@ -743,33 +736,36 @@ static int freq_reg_info_regd(struct wiphy *wiphy, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, - center_freq, - desired_bw_khz); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); - if (band_rule_found && bw_fits) { - *reg_rule = rr; - return 0; - } + if (band_rule_found && bw_fits) + return rr; } if (!band_rule_found) - return -ERANGE; + return ERR_PTR(-ERANGE); - return -EINVAL; + return ERR_PTR(-EINVAL); } -int freq_reg_info(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule) +const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, + u32 center_freq) { - assert_cfg80211_lock(); - return freq_reg_info_regd(wiphy, - center_freq, - desired_bw_khz, - reg_rule, - NULL); + const struct ieee80211_regdomain *regd; + struct regulatory_request *lr = get_last_request(); + + /* + * Follow the driver's regulatory domain, if present, unless a country + * IE has been processed or a user wants to help complaince further + */ + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->initiator != NL80211_REGDOM_SET_BY_USER && + wiphy->regd) + regd = get_wiphy_regdom(wiphy); + else + regd = get_cfg80211_regdom(); + + return freq_reg_info_regd(wiphy, center_freq, regd); } EXPORT_SYMBOL(freq_reg_info); @@ -792,7 +788,6 @@ static const char *reg_initiator_name(enum nl80211_reg_initiator initiator) } static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { const struct ieee80211_power_rule *power_rule; @@ -807,21 +802,16 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, else snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); - REG_DBG_PRINT("Updating information on frequency %d MHz " - "for a %d MHz width channel with regulatory rule:\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", + chan->center_freq); REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n", - freq_range->start_freq_khz, - freq_range->end_freq_khz, - freq_range->max_bandwidth_khz, - max_antenna_gain, + freq_range->start_freq_khz, freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, max_antenna_gain, power_rule->max_eirp); } #else static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { return; @@ -831,43 +821,25 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz - * per channel, the primary and the extension channel). To support - * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a - * new ieee80211_channel.target_bw and re run the regulatory check - * on the wiphy with the target_bw specified. Then we can simply use - * that below for the desired_bw_khz below. + * per channel, the primary and the extension channel). */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *chan) { - int r; u32 flags, bw_flags = 0; - u32 desired_bw_khz = MHZ_TO_KHZ(20); const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; struct wiphy *request_wiphy = NULL; + struct regulatory_request *lr = get_last_request(); - assert_cfg80211_lock(); - - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); flags = chan->orig_flags; - r = freq_reg_info(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule); - - if (r) { + reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); + if (IS_ERR(reg_rule)) { /* * We will disable all channels that do not match our * received regulatory rule unless the hint is coming @@ -879,7 +851,7 @@ static void handle_channel(struct wiphy *wiphy, * while 5 GHz is still supported. */ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && - r == -ERANGE) + PTR_ERR(reg_rule) == -ERANGE) return; REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); @@ -887,7 +859,7 @@ static void handle_channel(struct wiphy *wiphy, return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -895,7 +867,7 @@ static void handle_channel(struct wiphy *wiphy, if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) bw_flags = IEEE80211_CHAN_NO_HT40; - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* @@ -914,8 +886,9 @@ static void handle_channel(struct wiphy *wiphy, chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); - chan->max_antenna_gain = min(chan->orig_mag, - (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_antenna_gain = + min_t(int, chan->orig_mag, + MBI_TO_DBI(power_rule->max_antenna_gain)); chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); if (chan->orig_mpwr) { /* @@ -935,68 +908,65 @@ static void handle_channel(struct wiphy *wiphy, } static void handle_band(struct wiphy *wiphy, - enum ieee80211_band band, - enum nl80211_reg_initiator initiator) + enum nl80211_reg_initiator initiator, + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel(wiphy, initiator, band, i); + handle_channel(wiphy, initiator, &sband->channels[i]); } static bool reg_request_cell_base(struct regulatory_request *request) { if (request->initiator != NL80211_REGDOM_SET_BY_USER) return false; - if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) - return false; - return true; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } bool reg_last_request_cell_base(void) { bool val; - assert_cfg80211_lock(); mutex_lock(®_mutex); - val = reg_request_cell_base(last_request); + val = reg_request_cell_base(get_last_request()); mutex_unlock(®_mutex); + return val; } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS - /* Core specific check */ -static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +static enum reg_request_treatment +reg_ignore_cell_hint(struct regulatory_request *pending_request) { + struct regulatory_request *lr = get_last_request(); + if (!reg_num_devs_support_basehint) - return -EOPNOTSUPP; + return REG_REQ_IGNORE; - if (reg_request_cell_base(last_request)) { - if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; - return 0; - } - return 0; + if (reg_request_cell_base(lr) && + !regdom_changes(pending_request->alpha2)) + return REG_REQ_ALREADY_SET; + + return REG_REQ_OK; } /* Device specific check */ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { - if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS)) - return true; - return false; + return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS); } #else static int reg_ignore_cell_hint(struct regulatory_request *pending_request) { - return -EOPNOTSUPP; + return REG_REQ_IGNORE; } -static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) + +static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { return true; } @@ -1006,18 +976,17 @@ static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - if (!last_request) { - REG_DBG_PRINT("Ignoring regulatory request %s since " - "last_request is not set\n", + struct regulatory_request *lr = get_last_request(); + + if (!lr) { + REG_DBG_PRINT("Ignoring regulatory request %s since last_request is not set\n", reg_initiator_name(initiator)); return true; } if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver uses its own custom " - "regulatory domain\n", + REG_DBG_PRINT("Ignoring regulatory request %s since the driver uses its own custom regulatory domain\n", reg_initiator_name(initiator)); return true; } @@ -1028,22 +997,35 @@ static bool ignore_reg_update(struct wiphy *wiphy, */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - !is_world_regdom(last_request->alpha2)) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver requires its own regulatory " - "domain to be set first\n", + !is_world_regdom(lr->alpha2)) { + REG_DBG_PRINT("Ignoring regulatory request %s since the driver requires its own regulatory domain to be set first\n", reg_initiator_name(initiator)); return true; } - if (reg_request_cell_base(last_request)) + if (reg_request_cell_base(lr)) return reg_dev_ignore_cell_hint(wiphy); return false; } -static void handle_reg_beacon(struct wiphy *wiphy, - unsigned int chan_idx, +static bool reg_is_world_roaming(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *cr = get_cfg80211_regdom(); + const struct ieee80211_regdomain *wr = get_wiphy_regdom(wiphy); + struct regulatory_request *lr = get_last_request(); + + if (is_world_regdom(cr->alpha2) || (wr && is_world_regdom(wr->alpha2))) + return true; + + if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + return true; + + return false; +} + +static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { struct ieee80211_supported_band *sband; @@ -1051,8 +1033,6 @@ static void handle_reg_beacon(struct wiphy *wiphy, bool channel_changed = false; struct ieee80211_channel chan_before; - assert_cfg80211_lock(); - sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; @@ -1064,6 +1044,9 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (!reg_is_world_roaming(wiphy)) + return; + if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) return; @@ -1094,8 +1077,6 @@ static void wiphy_update_new_beacon(struct wiphy *wiphy, unsigned int i; struct ieee80211_supported_band *sband; - assert_cfg80211_lock(); - if (!wiphy->bands[reg_beacon->chan.band]) return; @@ -1114,11 +1095,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) struct ieee80211_supported_band *sband; struct reg_beacon *reg_beacon; - assert_cfg80211_lock(); - - if (list_empty(®_beacon_list)) - return; - list_for_each_entry(reg_beacon, ®_beacon_list, list) { if (!wiphy->bands[reg_beacon->chan.band]) continue; @@ -1128,18 +1104,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) } } -static bool reg_is_world_roaming(struct wiphy *wiphy) -{ - if (is_world_regdom(cfg80211_regdomain->alpha2) || - (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) - return true; - if (last_request && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) - return true; - return false; -} - /* Reap the advantages of previously found beacons */ static void reg_process_beacons(struct wiphy *wiphy) { @@ -1149,39 +1113,29 @@ static void reg_process_beacons(struct wiphy *wiphy) */ if (!last_request) return; - if (!reg_is_world_roaming(wiphy)) - return; wiphy_update_beacon_reg(wiphy); } -static bool is_ht40_not_allowed(struct ieee80211_channel *chan) +static bool is_ht40_allowed(struct ieee80211_channel *chan) { if (!chan) - return true; + return false; if (chan->flags & IEEE80211_CHAN_DISABLED) - return true; + return false; /* This would happen when regulatory rules disallow HT40 completely */ - if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40))) - return true; - return false; + if ((chan->flags & IEEE80211_CHAN_NO_HT40) == IEEE80211_CHAN_NO_HT40) + return false; + return true; } static void reg_process_ht_flags_channel(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *channel) { - struct ieee80211_supported_band *sband; - struct ieee80211_channel *channel; + struct ieee80211_supported_band *sband = wiphy->bands[channel->band]; struct ieee80211_channel *channel_before = NULL, *channel_after = NULL; unsigned int i; - assert_cfg80211_lock(); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - channel = &sband->channels[chan_idx]; - - if (is_ht40_not_allowed(channel)) { + if (!is_ht40_allowed(channel)) { channel->flags |= IEEE80211_CHAN_NO_HT40; return; } @@ -1192,6 +1146,7 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, */ for (i = 0; i < sband->n_channels; i++) { struct ieee80211_channel *c = &sband->channels[i]; + if (c->center_freq == (channel->center_freq - 20)) channel_before = c; if (c->center_freq == (channel->center_freq + 20)) @@ -1203,28 +1158,27 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, * if that ever changes we also need to change the below logic * to include that as well. */ - if (is_ht40_not_allowed(channel_before)) + if (!is_ht40_allowed(channel_before)) channel->flags |= IEEE80211_CHAN_NO_HT40MINUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS; - if (is_ht40_not_allowed(channel_after)) + if (!is_ht40_allowed(channel_after)) channel->flags |= IEEE80211_CHAN_NO_HT40PLUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS; } static void reg_process_ht_flags_band(struct wiphy *wiphy, - enum ieee80211_band band) + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - reg_process_ht_flags_channel(wiphy, band, i); + reg_process_ht_flags_channel(wiphy, &sband->channels[i]); } static void reg_process_ht_flags(struct wiphy *wiphy) @@ -1234,34 +1188,29 @@ static void reg_process_ht_flags(struct wiphy *wiphy) if (!wiphy) return; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - reg_process_ht_flags_band(wiphy, band); - } - + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { enum ieee80211_band band; - - assert_reg_lock(); + struct regulatory_request *lr = get_last_request(); if (ignore_reg_update(wiphy, initiator)) return; - last_request->dfs_region = cfg80211_regdomain->dfs_region; + lr->dfs_region = get_cfg80211_regdom()->dfs_region; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - handle_band(wiphy, band, initiator); - } + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + handle_band(wiphy, initiator, wiphy->bands[band]); reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); + if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, lr); } static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) @@ -1269,6 +1218,8 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; + assert_cfg80211_lock(); + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); @@ -1280,47 +1231,30 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, get_last_request()); } } static void handle_channel_custom(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx, + struct ieee80211_channel *chan, const struct ieee80211_regdomain *regd) { - int r; - u32 desired_bw_khz = MHZ_TO_KHZ(20); u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; - - assert_reg_lock(); - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; - - r = freq_reg_info_regd(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule, - regd); + reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq), + regd); - if (r) { - REG_DBG_PRINT("Disabling freq %d MHz as custom " - "regd has no rule that fits a %d MHz " - "wide channel\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + if (IS_ERR(reg_rule)) { + REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", + chan->center_freq); chan->flags = IEEE80211_CHAN_DISABLED; return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -1334,17 +1268,17 @@ static void handle_channel_custom(struct wiphy *wiphy, (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band, +static void handle_band_custom(struct wiphy *wiphy, + struct ieee80211_supported_band *sband, const struct ieee80211_regdomain *regd) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, band, i, regd); + handle_channel_custom(wiphy, &sband->channels[i], regd); } /* Used by drivers prior to wiphy registration */ @@ -1354,60 +1288,50 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, enum ieee80211_band band; unsigned int bands_set = 0; - mutex_lock(®_mutex); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!wiphy->bands[band]) continue; - handle_band_custom(wiphy, band, regd); + handle_band_custom(wiphy, wiphy->bands[band], regd); bands_set++; } - mutex_unlock(®_mutex); /* * no point in calling this if it won't have any effect - * on your device's supportd bands. + * on your device's supported bands. */ WARN_ON(!bands_set); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -/* - * Return value which can be used by ignore_request() to indicate - * it has been determined we should intersect two regulatory domains - */ -#define REG_INTERSECT 1 - /* This has the logic which determines when a new request * should be ignored. */ -static int ignore_request(struct wiphy *wiphy, +static enum reg_request_treatment +get_reg_request_treatment(struct wiphy *wiphy, struct regulatory_request *pending_request) { struct wiphy *last_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); /* All initial requests are respected */ - if (!last_request) - return 0; + if (!lr) + return REG_REQ_OK; switch (pending_request->initiator) { case NL80211_REGDOM_SET_BY_CORE: - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_COUNTRY_IE: - - if (reg_request_cell_base(last_request)) { + if (reg_request_cell_base(lr)) { /* Trust a Cell base station over the AP's country IE */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } - last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) return -EINVAL; - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { if (last_wiphy != wiphy) { /* * Two cards with two APs claiming different @@ -1416,23 +1340,23 @@ static int ignore_request(struct wiphy *wiphy, * to be correct. Reject second one for now. */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } /* * Two consecutive Country IE hints on the same wiphy. * This should be picked up early by the driver/stack */ if (WARN_ON(regdom_changes(pending_request->alpha2))) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } return 0; case NL80211_REGDOM_SET_BY_DRIVER: - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } /* @@ -1440,59 +1364,59 @@ static int ignore_request(struct wiphy *wiphy, * back in or if you add a new device for which the previously * loaded card also agrees on the regulatory domain. */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && !regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return REG_INTERSECT; + return REG_REQ_INTERSECT; case NL80211_REGDOM_SET_BY_USER: if (reg_request_cell_base(pending_request)) return reg_ignore_cell_hint(pending_request); - if (reg_request_cell_base(last_request)) - return -EOPNOTSUPP; + if (reg_request_cell_base(lr)) + return REG_REQ_IGNORE; - if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) - return REG_INTERSECT; + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) + return REG_REQ_INTERSECT; /* * If the user knows better the user should set the regdom * to their country before the IE is picked up */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER && - last_request->intersect) - return -EOPNOTSUPP; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER && + lr->intersect) + return REG_REQ_IGNORE; /* * Process user requests only after previous user/driver/core * requests have been processed */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE || - last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - if (regdom_changes(last_request->alpha2)) - return -EAGAIN; - } + if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || + lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_USER) && + regdom_changes(lr->alpha2)) + return REG_REQ_IGNORE; if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return 0; + return REG_REQ_OK; } - return -EINVAL; + return REG_REQ_IGNORE; } static void reg_set_request_processed(void) { bool need_more_processing = false; + struct regulatory_request *lr = get_last_request(); - last_request->processed = true; + lr->processed = true; spin_lock(®_requests_lock); if (!list_empty(®_requests_list)) need_more_processing = true; spin_unlock(®_requests_lock); - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) cancel_delayed_work(®_timeout); if (need_more_processing) @@ -1508,116 +1432,122 @@ static void reg_set_request_processed(void) * The Wireless subsystem can use this function to hint to the wireless core * what it believes should be the current regulatory domain. * - * Returns zero if all went fine, %-EALREADY if a regulatory domain had - * already been set or other standard error codes. + * Returns one of the different reg request treatment values. * - * Caller must hold &cfg80211_mutex and ®_mutex + * Caller must hold ®_mutex */ -static int __regulatory_hint(struct wiphy *wiphy, - struct regulatory_request *pending_request) +static enum reg_request_treatment +__regulatory_hint(struct wiphy *wiphy, + struct regulatory_request *pending_request) { + const struct ieee80211_regdomain *regd; bool intersect = false; - int r = 0; - - assert_cfg80211_lock(); + enum reg_request_treatment treatment; + struct regulatory_request *lr; - r = ignore_request(wiphy, pending_request); + treatment = get_reg_request_treatment(wiphy, pending_request); - if (r == REG_INTERSECT) { + switch (treatment) { + case REG_REQ_INTERSECT: if (pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return PTR_ERR(regd); } + rcu_assign_pointer(wiphy->regd, regd); } intersect = true; - } else if (r) { + break; + case REG_REQ_OK: + break; + default: /* * If the regulatory domain being requested by the * driver has already been set just copy it to the * wiphy */ - if (r == -EALREADY && - pending_request->initiator == - NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + if (treatment == REG_REQ_ALREADY_SET && + pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return REG_REQ_IGNORE; } - r = -EALREADY; + treatment = REG_REQ_ALREADY_SET; + rcu_assign_pointer(wiphy->regd, regd); goto new_request; } kfree(pending_request); - return r; + return treatment; } new_request: - if (last_request != &core_request_world) - kfree(last_request); + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); - last_request = pending_request; - last_request->intersect = intersect; + pending_request->intersect = intersect; + pending_request->processed = false; + rcu_assign_pointer(last_request, pending_request); + lr = pending_request; pending_request = NULL; - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - user_alpha2[0] = last_request->alpha2[0]; - user_alpha2[1] = last_request->alpha2[1]; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) { + user_alpha2[0] = lr->alpha2[0]; + user_alpha2[1] = lr->alpha2[1]; } - /* When r == REG_INTERSECT we do need to call CRDA */ - if (r < 0) { + /* When r == REG_REQ_INTERSECT we do need to call CRDA */ + if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) { /* * Since CRDA will not be called in this case as we already * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) { - nl80211_send_reg_change_event(last_request); + if (treatment == REG_REQ_ALREADY_SET) { + nl80211_send_reg_change_event(lr); reg_set_request_processed(); } - return r; + return treatment; } - return call_crda(last_request->alpha2); + if (call_crda(lr->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_OK; } /* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request, enum nl80211_reg_initiator reg_initiator) { - int r = 0; struct wiphy *wiphy = NULL; - BUG_ON(!reg_request->alpha2); + if (WARN_ON(!reg_request->alpha2)) + return; - if (wiphy_idx_valid(reg_request->wiphy_idx)) + if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && - !wiphy) { + if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; } - r = __regulatory_hint(wiphy, reg_request); - /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy && - wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { - wiphy_update_regulatory(wiphy, reg_initiator); - return; + switch (__regulatory_hint(wiphy, reg_request)) { + case REG_REQ_ALREADY_SET: + /* This is required so that the orig_* parameters are saved */ + if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + wiphy_update_regulatory(wiphy, reg_initiator); + break; + default: + if (reg_initiator == NL80211_REGDOM_SET_BY_USER) + schedule_delayed_work(®_timeout, + msecs_to_jiffies(3142)); + break; } - - /* - * We only time out user hints, given that they should be the only - * source of bogus requests. - */ - if (r != -EALREADY && - reg_initiator == NL80211_REGDOM_SET_BY_USER) - schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); } /* @@ -1627,15 +1557,15 @@ static void reg_process_hint(struct regulatory_request *reg_request, */ static void reg_process_pending_hints(void) { - struct regulatory_request *reg_request; + struct regulatory_request *reg_request, *lr; mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); + lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ - if (last_request && !last_request->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting " - "for it to be processed...\n"); + if (lr && !lr->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); goto out; } @@ -1666,23 +1596,14 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - /* - * No need to hold the reg_mutex here as we just touch wiphys - * and do not read or access regulatory variables. - */ mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); - if (list_empty(®_pending_beacons)) { - spin_unlock_bh(®_pending_beacons_lock); - goto out; - } - list_for_each_entry_safe(pending_beacon, tmp, ®_pending_beacons, list) { - list_del_init(&pending_beacon->list); /* Applies the beacon hint to current wiphys */ @@ -1694,7 +1615,7 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); -out: + mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); } @@ -1706,10 +1627,8 @@ static void reg_todo(struct work_struct *work) static void queue_regulatory_request(struct regulatory_request *request) { - if (isalpha(request->alpha2[0])) - request->alpha2[0] = toupper(request->alpha2[0]); - if (isalpha(request->alpha2[1])) - request->alpha2[1] = toupper(request->alpha2[1]); + request->alpha2[0] = toupper(request->alpha2[0]); + request->alpha2[1] = toupper(request->alpha2[1]); spin_lock(®_requests_lock); list_add_tail(&request->list, ®_requests_list); @@ -1726,8 +1645,7 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1746,13 +1664,14 @@ int regulatory_hint_user(const char *alpha2, { struct regulatory_request *request; - BUG_ON(!alpha2); + if (WARN_ON(!alpha2)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; - request->wiphy_idx = WIPHY_IDX_STALE; + request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_USER; @@ -1768,8 +1687,8 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { struct regulatory_request *request; - BUG_ON(!alpha2); - BUG_ON(!wiphy); + if (WARN_ON(!alpha2 || !wiphy)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1777,9 +1696,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->wiphy_idx = get_wiphy_idx(wiphy); - /* Must have registered wiphy first */ - BUG_ON(!wiphy_idx_valid(request->wiphy_idx)); - request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_DRIVER; @@ -1794,18 +1710,17 @@ EXPORT_SYMBOL(regulatory_hint); * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and * therefore cannot iterate over the rdev list here. */ -void regulatory_hint_11d(struct wiphy *wiphy, - enum ieee80211_band band, - const u8 *country_ie, - u8 country_ie_len) +void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, + const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request; + struct regulatory_request *request, *lr; mutex_lock(®_mutex); + lr = get_last_request(); - if (unlikely(!last_request)) + if (unlikely(!lr)) goto out; /* IE len must be evenly divisible by 2 */ @@ -1828,9 +1743,8 @@ void regulatory_hint_11d(struct wiphy *wiphy, * We leave conflict resolution to the workqueue, where can hold * cfg80211_mutex. */ - if (likely(last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy_idx_valid(last_request->wiphy_idx))) + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); @@ -1843,12 +1757,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_env = env; - mutex_unlock(®_mutex); - queue_regulatory_request(request); - - return; - out: mutex_unlock(®_mutex); } @@ -1863,8 +1772,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) if (is_user_regdom_saved()) { /* Unless we're asked to ignore it and reset it */ if (reset_user) { - REG_DBG_PRINT("Restoring regulatory settings " - "including user preference\n"); + REG_DBG_PRINT("Restoring regulatory settings including user preference\n"); user_alpha2[0] = '9'; user_alpha2[1] = '7'; @@ -1874,26 +1782,20 @@ static void restore_alpha2(char *alpha2, bool reset_user) * back as they were for a full restore. */ if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } } else { - REG_DBG_PRINT("Restoring regulatory settings " - "while preserving user preference for: %c%c\n", - user_alpha2[0], - user_alpha2[1]); + REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n", + user_alpha2[0], user_alpha2[1]); alpha2[0] = user_alpha2[0]; alpha2[1] = user_alpha2[1]; } } else if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } else @@ -1948,7 +1850,7 @@ static void restore_regulatory_settings(bool reset_user) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(true); + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); /* @@ -1958,49 +1860,35 @@ static void restore_regulatory_settings(bool reset_user) * settings. */ spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - if (reg_request->initiator != - NL80211_REGDOM_SET_BY_USER) - continue; - list_move_tail(®_request->list, &tmp_reg_req_list); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) + continue; + list_move_tail(®_request->list, &tmp_reg_req_list); } spin_unlock(®_requests_lock); /* Clear beacon hints */ spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } /* First restore to the basic regulatory settings */ - cfg80211_regdomain = cfg80211_world_regdom; - world_alpha2[0] = cfg80211_regdomain->alpha2[0]; - world_alpha2[1] = cfg80211_regdomain->alpha2[1]; + world_alpha2[0] = cfg80211_world_regdom->alpha2[0]; + world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) restore_custom_reg_settings(&rdev->wiphy); } - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); - regulatory_hint_core(world_alpha2); /* @@ -2011,20 +1899,8 @@ static void restore_regulatory_settings(bool reset_user) if (is_an_alpha2(alpha2)) regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER); - if (list_empty(&tmp_reg_req_list)) - return; - - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - spin_lock(®_requests_lock); - list_for_each_entry_safe(reg_request, tmp, &tmp_reg_req_list, list) { - REG_DBG_PRINT("Adding request for country %c%c back " - "into the queue\n", - reg_request->alpha2[0], - reg_request->alpha2[1]); - list_move_tail(®_request->list, ®_requests_list); - } + list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); mutex_unlock(®_mutex); @@ -2037,8 +1913,7 @@ static void restore_regulatory_settings(bool reset_user) void regulatory_hint_disconnect(void) { - REG_DBG_PRINT("All devices are disconnected, going to " - "restore regulatory settings\n"); + REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -2051,31 +1926,48 @@ static bool freq_is_chan_12_13_14(u16 freq) return false; } +static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan) +{ + struct reg_beacon *pending_beacon; + + list_for_each_entry(pending_beacon, ®_pending_beacons, list) + if (beacon_chan->center_freq == + pending_beacon->chan.center_freq) + return true; + return false; +} + int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, gfp_t gfp) { struct reg_beacon *reg_beacon; + bool processing; - if (likely((beacon_chan->beacon_found || - (beacon_chan->flags & IEEE80211_CHAN_RADAR) || + if (beacon_chan->beacon_found || + beacon_chan->flags & IEEE80211_CHAN_RADAR || (beacon_chan->band == IEEE80211_BAND_2GHZ && - !freq_is_chan_12_13_14(beacon_chan->center_freq))))) + !freq_is_chan_12_13_14(beacon_chan->center_freq))) + return 0; + + spin_lock_bh(®_pending_beacons_lock); + processing = pending_reg_beacon(beacon_chan); + spin_unlock_bh(®_pending_beacons_lock); + + if (processing) return 0; reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp); if (!reg_beacon) return -ENOMEM; - REG_DBG_PRINT("Found new beacon on " - "frequency: %d MHz (Ch %d) on %s\n", + REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", beacon_chan->center_freq, ieee80211_frequency_to_channel(beacon_chan->center_freq), wiphy_name(wiphy)); memcpy(®_beacon->chan, beacon_chan, - sizeof(struct ieee80211_channel)); - + sizeof(struct ieee80211_channel)); /* * Since we can be called from BH or and non-BH context @@ -2155,21 +2047,19 @@ static void print_dfs_region(u8 dfs_region) pr_info(" DFS Master region JP"); break; default: - pr_info(" DFS Master region Uknown"); + pr_info(" DFS Master region Unknown"); break; } } static void print_regdomain(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr = get_last_request(); if (is_intersected_alpha2(rd->alpha2)) { - - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { struct cfg80211_registered_device *rdev; - rdev = cfg80211_rdev_by_wiphy_idx( - last_request->wiphy_idx); + rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx); if (rdev) { pr_info("Current regulatory domain updated by AP to: %c%c\n", rdev->country_ie_alpha2[0], @@ -2178,22 +2068,21 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) pr_info("Current regulatory domain intersected:\n"); } else pr_info("Current regulatory domain intersected:\n"); - } else if (is_world_regdom(rd->alpha2)) + } else if (is_world_regdom(rd->alpha2)) { pr_info("World regulatory domain updated:\n"); - else { + } else { if (is_unknown_alpha2(rd->alpha2)) pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); else { - if (reg_request_cell_base(last_request)) - pr_info("Regulatory domain changed " - "to country: %c%c by Cell Station\n", + if (reg_request_cell_base(lr)) + pr_info("Regulatory domain changed to country: %c%c by Cell Station\n", rd->alpha2[0], rd->alpha2[1]); else - pr_info("Regulatory domain changed " - "to country: %c%c\n", + pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } } + print_dfs_region(rd->dfs_region); print_rd_rules(rd); } @@ -2207,22 +2096,23 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { + const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; struct wiphy *request_wiphy; + struct regulatory_request *lr = get_last_request(); + /* Some basic sanity checks first */ + if (!reg_is_valid_request(rd->alpha2)) + return -EINVAL; + if (is_world_regdom(rd->alpha2)) { - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; update_world_regdomain(rd); return 0; } if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && - !is_unknown_alpha2(rd->alpha2)) - return -EINVAL; - - if (!last_request) + !is_unknown_alpha2(rd->alpha2)) return -EINVAL; /* @@ -2230,7 +2120,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * rd is non static (it means CRDA was present and was used last) * and the pending request came in from a country IE */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { /* * If someone else asked us to change the rd lets only bother * checking if the alpha2 changes if CRDA was already called @@ -2246,29 +2136,23 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * internal EEPROM data */ - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; - if (!is_valid_rd(rd)) { pr_err("Invalid regulatory domain detected:\n"); print_regdomain_info(rd); return -EINVAL; } - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy && - (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { schedule_delayed_work(®_timeout, 0); return -ENODEV; } - if (!last_request->intersect) { - int r; - - if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(false); - cfg80211_regdomain = rd; + if (!lr->intersect) { + if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) { + reset_regdomains(false, rd); return 0; } @@ -2284,20 +2168,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (request_wiphy->regd) return -EALREADY; - r = reg_copy_regd(&request_wiphy->regd, rd); - if (r) - return r; + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); - reset_regdomains(false); - cfg80211_regdomain = rd; + rcu_assign_pointer(request_wiphy->regd, regd); + reset_regdomains(false, rd); return 0; } /* Intersection requires a bit more work */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - - intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); if (!intersected_rd) return -EINVAL; @@ -2306,15 +2189,14 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * However if a driver requested this specific regulatory * domain we keep it for its private use */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) - request_wiphy->regd = rd; + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) + rcu_assign_pointer(request_wiphy->regd, rd); else kfree(rd); rd = NULL; - reset_regdomains(false); - cfg80211_regdomain = intersected_rd; + reset_regdomains(false, intersected_rd); return 0; } @@ -2326,15 +2208,15 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* * Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. Caller must hold cfg80211_mutex + * kmalloc'd the rd structure. */ int set_regdom(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr; int r; - assert_cfg80211_lock(); - mutex_lock(®_mutex); + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); @@ -2343,23 +2225,25 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - mutex_unlock(®_mutex); - return r; + goto out; } /* This would make this whole thing pointless */ - if (!last_request->intersect) - BUG_ON(rd != cfg80211_regdomain); + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { + r = -EINVAL; + goto out; + } /* update all wiphys now with the new established regulatory domain */ - update_all_wiphy_regulatory(last_request->initiator); + update_all_wiphy_regulatory(lr->initiator); - print_regdomain(cfg80211_regdomain); + print_regdomain(get_cfg80211_regdom()); - nl80211_send_reg_change_event(last_request); + nl80211_send_reg_change_event(lr); reg_set_request_processed(); + out: mutex_unlock(®_mutex); return r; @@ -2367,20 +2251,26 @@ int set_regdom(const struct ieee80211_regdomain *rd) int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) { - if (last_request && !last_request->processed) { - if (add_uevent_var(env, "COUNTRY=%c%c", - last_request->alpha2[0], - last_request->alpha2[1])) - return -ENOMEM; + struct regulatory_request *lr; + u8 alpha2[2]; + bool add = false; + + rcu_read_lock(); + lr = get_last_request(); + if (lr && !lr->processed) { + memcpy(alpha2, lr->alpha2, 2); + add = true; } + rcu_read_unlock(); + if (add) + return add_uevent_var(env, "COUNTRY=%c%c", + alpha2[0], alpha2[1]); return 0; } void wiphy_regulatory_register(struct wiphy *wiphy) { - assert_cfg80211_lock(); - mutex_lock(®_mutex); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2395,32 +2285,32 @@ void wiphy_regulatory_register(struct wiphy *wiphy) void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr; mutex_lock(®_mutex); + lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint--; - kfree(wiphy->regd); + rcu_free_regdom(get_wiphy_regdom(wiphy)); + rcu_assign_pointer(wiphy->regd, NULL); - if (last_request) - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (lr) + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) goto out; - last_request->wiphy_idx = WIPHY_IDX_STALE; - last_request->country_ie_env = ENVIRON_ANY; + lr->wiphy_idx = WIPHY_IDX_INVALID; + lr->country_ie_env = ENVIRON_ANY; out: mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) { - REG_DBG_PRINT("Timeout while waiting for CRDA to reply, " - "restoring regulatory settings\n"); + REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); restore_regulatory_settings(true); } @@ -2439,13 +2329,13 @@ int __init regulatory_init(void) reg_regdb_size_check(); - cfg80211_regdomain = cfg80211_world_regdom; + rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; user_alpha2[1] = '7'; /* We always try to get an update for the static regdomain */ - err = regulatory_hint_core(cfg80211_regdomain->alpha2); + err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { if (err == -ENOMEM) return err; @@ -2457,10 +2347,6 @@ int __init regulatory_init(void) * errors as non-fatal. */ pr_err("kobject_uevent_env() was unable to call CRDA during init\n"); -#ifdef CONFIG_CFG80211_REG_DEBUG - /* We want to find out exactly why when debugging */ - WARN_ON(err); -#endif } /* @@ -2474,7 +2360,7 @@ int __init regulatory_init(void) return 0; } -void /* __init_or_exit */ regulatory_exit(void) +void regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; struct reg_beacon *reg_beacon, *btmp; @@ -2482,43 +2368,27 @@ void /* __init_or_exit */ regulatory_exit(void) cancel_work_sync(®_work); cancel_delayed_work_sync(®_timeout); - mutex_lock(&cfg80211_mutex); + /* Lock to suppress warnings */ mutex_lock(®_mutex); - - reset_regdomains(true); + reset_regdomains(true, NULL); + mutex_unlock(®_mutex); dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); - spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - list_del(®_request->list); - kfree(reg_request); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + list_del(®_request->list); + kfree(reg_request); } - spin_unlock(®_requests_lock); - - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4c0a32ffd530..af2d5f8a5d82 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -16,10 +16,9 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -extern const struct ieee80211_regdomain *cfg80211_regdomain; +extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool is_world_regdom(const char *alpha2); -bool reg_is_valid_request(const char *alpha2); bool reg_supported_dfs_region(u8 dfs_region); int regulatory_hint_user(const char *alpha2, @@ -55,8 +54,8 @@ bool reg_last_request_cell_base(void); * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, - struct ieee80211_channel *beacon_chan, - gfp_t gfp); + struct ieee80211_channel *beacon_chan, + gfp_t gfp); /** * regulatory_hint_11d - hints a country IE as a regulatory domain diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 01592d7d4789..45f1618c8e23 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1358,7 +1358,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, &iwe, IW_EV_UINT_LEN); } - buf = kmalloc(30, GFP_ATOMIC); + buf = kmalloc(31, GFP_ATOMIC); if (buf) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f2431e41a373..a825dfe12cf7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -192,7 +192,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) @@ -519,10 +520,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * - country_ie + 2, the start of the country ie data, and * - and country_ie[1] which is the IE length */ - regulatory_hint_11d(wdev->wiphy, - bss->channel->band, - country_ie + 2, - country_ie[1]); + regulatory_hint_11d(wdev->wiphy, bss->channel->band, + country_ie + 2, country_ie[1]); kfree(country_ie); } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576f426e..8bc553199686 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1767,6 +1767,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WIPHY_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a807c2f..d7873c7ae0ec 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,46 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan && + (chan->flags & IEEE80211_CHAN_RADAR)); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NL80211_IFTYPE_P2P_DEVICE: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1308,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1341,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 257dfb18aef4..5b47180986f8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2891,7 +2891,7 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); htab = &net->xfrm.policy_bydst[dir]; - sz = (htab->hmask + 1); + sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); xfrm_hash_free(htab->table, sz); } @@ -3021,10 +3021,10 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && - xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) == 0 && - xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) == 0 && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { return true; @@ -3082,10 +3082,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: - if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, - m->old_family) == 0 && - xfrm_addr_cmp(&t->saddr, &m->old_saddr, - m->old_family) == 0) { + if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, + m->old_family) && + xfrm_addr_equal(&t->saddr, &m->old_saddr, + m->old_family)) { match = 1; } break; @@ -3151,10 +3151,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) return -EINVAL; for (i = 0; i < num_migrate; i++) { - if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, - m[i].old_family) == 0) && - (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, - m[i].old_family) == 0)) + if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) && + xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family)) return -EINVAL; if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 765f6fe951eb..35754cc8a9e5 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; u32 seq = ntohl(net_seq); - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 pos; if (!replay_esn->replay_window) return; + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + if (seq > replay_esn->seq) { diff = seq - replay_esn->seq; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0adae918a7a2..ae01bdbcb294 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -691,7 +691,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -715,8 +715,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -981,8 +981,8 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && - !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) + xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && + xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) x->genid++; } } @@ -1016,8 +1016,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, x->id.spi != 0 || x->id.proto != proto || (mark & x->mark.m) != x->mark.v || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; xfrm_state_hold(x); @@ -1100,7 +1100,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || - xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { + !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; x1 = NULL; } @@ -1226,10 +1226,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) continue; if (m->reqid && x->props.reqid != m->reqid) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1241,10 +1241,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) if (x->props.mode != m->mode || x->id.proto != m->proto) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1269,7 +1269,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); /* add state */ - if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { /* a care is needed when the destination address of the state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index eb872b2e366e..fbd9e6cd0fd7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1112,7 +1112,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); - if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } |