diff options
Diffstat (limited to 'net')
36 files changed, 5151 insertions, 89 deletions
| diff --git a/net/Kconfig b/net/Kconfig index e07272d0bb2..f4e0c23be13 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -79,6 +79,20 @@ source "net/netlabel/Kconfig"  endif # if INET +config ANDROID_PARANOID_NETWORK +	bool "Only allow certain groups to create sockets" +	default n +	help +		none + +config NET_ACTIVITY_STATS +	bool "Network activity statistics tracking" +	default n +	help +	 Network activity statistics are useful for tracking wireless +	 modem activity on 2G, 3G, 4G wireless networks. Counts number of +	 transmissions and groups them in specified time buckets. +  config NETWORK_SECMARK  	bool "Security Marking"  	help @@ -218,7 +232,7 @@ source "net/batman-adv/Kconfig"  source "net/openvswitch/Kconfig"  config RPS -	boolean +	boolean "RPS"  	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS  	default y diff --git a/net/Makefile b/net/Makefile index ad432fa4d93..6865dab6af4 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_CEPH_LIB)		+= ceph/  obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/  obj-$(CONFIG_NFC)		+= nfc/  obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/ +obj-$(CONFIG_NET_ACTIVITY_STATS)		+= activity_stats.o diff --git a/net/activity_stats.c b/net/activity_stats.c new file mode 100644 index 00000000000..8a3e9347006 --- /dev/null +++ b/net/activity_stats.c @@ -0,0 +1,115 @@ +/* net/activity_stats.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + */ + +#include <linux/proc_fs.h> +#include <linux/suspend.h> +#include <net/net_namespace.h> + +/* + * Track transmission rates in buckets (power of 2). + * 1,2,4,8...512 seconds. + * + * Buckets represent the count of network transmissions at least + * N seconds apart, where N is 1 << bucket index. + */ +#define BUCKET_MAX 10 + +/* Track network activity frequency */ +static unsigned long activity_stats[BUCKET_MAX]; +static ktime_t last_transmit; +static ktime_t suspend_time; +static DEFINE_SPINLOCK(activity_lock); + +void activity_stats_update(void) +{ +	int i; +	unsigned long flags; +	ktime_t now; +	s64 delta; + +	spin_lock_irqsave(&activity_lock, flags); +	now = ktime_get(); +	delta = ktime_to_ns(ktime_sub(now, last_transmit)); + +	for (i = BUCKET_MAX - 1; i >= 0; i--) { +		/* +		 * Check if the time delta between network activity is within the +		 * minimum bucket range. +		 */ +		if (delta < (1000000000ULL << i)) +			continue; + +		activity_stats[i]++; +		last_transmit = now; +		break; +	} +	spin_unlock_irqrestore(&activity_lock, flags); +} + +static int activity_stats_read_proc(char *page, char **start, off_t off, +					int count, int *eof, void *data) +{ +	int i; +	int len; +	char *p = page; + +	/* Only print if offset is 0, or we have enough buffer space */ +	if (off || count < (30 * BUCKET_MAX + 22)) +		return -ENOMEM; + +	len = snprintf(p, count, "Min Bucket(sec) Count\n"); +	count -= len; +	p += len; + +	for (i = 0; i < BUCKET_MAX; i++) { +		len = snprintf(p, count, "%15d %lu\n", 1 << i, activity_stats[i]); +		count -= len; +		p += len; +	} +	*eof = 1; + +	return p - page; +} + +static int activity_stats_notifier(struct notifier_block *nb, +					unsigned long event, void *dummy) +{ +	switch (event) { +		case PM_SUSPEND_PREPARE: +			suspend_time = ktime_get_real(); +			break; + +		case PM_POST_SUSPEND: +			suspend_time = ktime_sub(ktime_get_real(), suspend_time); +			last_transmit = ktime_sub(last_transmit, suspend_time); +	} + +	return 0; +} + +static struct notifier_block activity_stats_notifier_block = { +	.notifier_call = activity_stats_notifier, +}; + +static int  __init activity_stats_init(void) +{ +	create_proc_read_entry("activity", S_IRUGO, +			init_net.proc_net_stat, activity_stats_read_proc, NULL); +	return register_pm_notifier(&activity_stats_notifier_block); +} + +subsys_initcall(activity_stats_init); + diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 6fb68a9743a..b9af14e8a9e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -40,6 +40,15 @@  #include <net/bluetooth/bluetooth.h> +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> +#endif + +#ifndef CONFIG_BT_SOCK_DEBUG +#undef  BT_DBG +#define BT_DBG(D...) +#endif +  #define VERSION "2.16"  /* Bluetooth sockets */ @@ -122,11 +131,40 @@ int bt_sock_unregister(int proto)  }  EXPORT_SYMBOL(bt_sock_unregister); +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static inline int current_has_bt_admin(void) +{ +	return (!current_euid() || in_egroup_p(AID_NET_BT_ADMIN)); +} + +static inline int current_has_bt(void) +{ +	return (current_has_bt_admin() || in_egroup_p(AID_NET_BT)); +} +# else +static inline int current_has_bt_admin(void) +{ +	return 1; +} + +static inline int current_has_bt(void) +{ +	return 1; +} +#endif +  static int bt_sock_create(struct net *net, struct socket *sock, int proto,  			  int kern)  {  	int err; +	if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO || +			proto == BTPROTO_L2CAP) { +		if (!current_has_bt()) +			return -EPERM; +	} else if (!current_has_bt_admin()) +		return -EPERM; +  	if (net != &init_net)  		return -EAFNOSUPPORT; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6226dc4e9ef..5abfd524c6c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -380,7 +380,7 @@ static void hci_conn_auto_accept(unsigned long arg)  }  struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, -					 bdaddr_t *dst) +					__u16 pkt_type, bdaddr_t *dst)  {  	struct hci_conn *conn; @@ -408,13 +408,22 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,  		conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;  		break;  	case SCO_LINK: -		if (lmp_esco_capable(hdev)) -			conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; -		else -			conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; -		break; +		if (!pkt_type) +			pkt_type = SCO_ESCO_MASK;  	case ESCO_LINK: -		conn->pkt_type = hdev->esco_type; +		if (!pkt_type) +			pkt_type = ALL_ESCO_MASK; +		if (lmp_esco_capable(hdev)) { +			/* HCI Setup Synchronous Connection Command uses +			   reverse logic on the EDR_ESCO_MASK bits */ +			conn->pkt_type = (pkt_type ^ EDR_ESCO_MASK) & +					hdev->esco_type; +		} else { +			/* Legacy HCI Add Sco Connection Command uses a +			   shifted bitmask */ +			conn->pkt_type = (pkt_type << 5) & hdev->pkt_type & +					SCO_PTYPE_MASK; +		}  		break;  	} @@ -532,9 +541,10 @@ EXPORT_SYMBOL(hci_get_route);  /* Create SCO, ACL or LE connection.   * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, -				__u8 sec_level, __u8 auth_type, -				struct bt_sco_parameters *sco_parameters) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, +					__u16 pkt_type, bdaddr_t *dst, +					__u8 sec_level, __u8 auth_type, +					struct bt_sco_parameters *sco_parameters)  {  	struct hci_conn *acl;  	struct hci_conn *sco; @@ -553,7 +563,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,  		if (!entry)  			return ERR_PTR(-EHOSTUNREACH); -		le = hci_conn_add(hdev, LE_LINK, dst); +		le = hci_conn_add(hdev, LE_LINK, 0, dst);  		if (!le)  			return ERR_PTR(-ENOMEM); @@ -568,7 +578,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,  	acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);  	if (!acl) { -		acl = hci_conn_add(hdev, ACL_LINK, dst); +		acl = hci_conn_add(hdev, ACL_LINK, 0, dst);  		if (!acl)  			return ERR_PTR(-ENOMEM);  	} @@ -587,7 +597,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,  	sco = hci_conn_hash_lookup_ba(hdev, type, dst);  	if (!sco) { -		sco = hci_conn_add(hdev, type, dst); +		sco = hci_conn_add(hdev, type, pkt_type, dst);  		if (!sco) {  			hci_conn_put(acl);  			return ERR_PTR(-ENOMEM); @@ -893,6 +903,15 @@ int hci_get_conn_list(void __user *arg)  		(ci + n)->out   = c->out;  		(ci + n)->state = c->state;  		(ci + n)->link_mode = c->link_mode; +		if (c->type == SCO_LINK) { +			(ci + n)->mtu = hdev->sco_mtu; +			(ci + n)->cnt = hdev->sco_cnt; +			(ci + n)->pkts = hdev->sco_pkts; +		} else { +			(ci + n)->mtu = hdev->acl_mtu; +			(ci + n)->cnt = hdev->acl_cnt; +			(ci + n)->pkts = hdev->acl_pkts; +		}  		if (++n >= req.conn_num)  			break;  	} @@ -929,6 +948,15 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)  		ci.out   = conn->out;  		ci.state = conn->state;  		ci.link_mode = conn->link_mode; +		if (req.type == SCO_LINK) { +			ci.mtu = hdev->sco_mtu; +			ci.cnt = hdev->sco_cnt; +			ci.pkts = hdev->sco_pkts; +		} else { +			ci.mtu = hdev->acl_mtu; +			ci.cnt = hdev->acl_cnt; +			ci.pkts = hdev->acl_pkts; +		}  	}  	hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 334b51a4d75..7d133ec38bd 100644..100755 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1213,7 +1213,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)  		}  	} else {  		if (!conn) { -			conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr); +			conn = hci_conn_add(hdev, ACL_LINK, 0, &cp->bdaddr);  			if (conn) {  				conn->out = true;  				conn->link_mode |= HCI_LM_MASTER; @@ -1630,7 +1630,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)  		}  	} else {  		if (!conn) { -			conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); +			conn = hci_conn_add(hdev, LE_LINK, 0, &cp->peer_addr);  			if (conn) {  				conn->dst_type = cp->peer_addr_type;  				conn->out = true; @@ -1800,6 +1800,15 @@ unlock:  	hci_conn_check_pending(hdev);  } +static inline bool is_sco_active(struct hci_dev *hdev) +{ +	if (hci_conn_hash_lookup_state(hdev, SCO_LINK, BT_CONNECTED) || +			(hci_conn_hash_lookup_state(hdev, ESCO_LINK, +						    BT_CONNECTED))) +		return true; +	return false; +} +  static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)  {  	struct hci_ev_conn_request *ev = (void *) skb->data; @@ -1825,7 +1834,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk  		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);  		if (!conn) {  			/* pkt_type not yet used for incoming connections */ -			conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr); +			conn = hci_conn_add(hdev, ev->link_type, 0, &ev->bdaddr);  			if (!conn) {  				BT_ERR("No memory for new connection");  				hci_dev_unlock(hdev); @@ -1843,7 +1852,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk  			bacpy(&cp.bdaddr, &ev->bdaddr); -			if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) +			if (lmp_rswitch_capable(hdev) && ((mask & HCI_LM_MASTER) +						|| is_sco_active(hdev)))  				cp.role = 0x00; /* Become master */  			else  				cp.role = 0x01; /* Remain slave */ @@ -2933,6 +2943,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu  		hci_conn_add_sysfs(conn);  		break; +	case 0x10:	/* Connection Accept Timeout */  	case 0x11:	/* Unsupported Feature or Parameter Value */  	case 0x1c:	/* SCO interval rejected */  	case 0x1a:	/* Unsupported Remote Feature */ @@ -3277,7 +3288,7 @@ static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff  	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);  	if (!conn) { -		conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); +		conn = hci_conn_add(hdev, LE_LINK, 0, &ev->bdaddr);  		if (!conn) {  			BT_ERR("No memory for new connection");  			hci_dev_unlock(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 25dd22767ce..0f84b5661ee 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1218,10 +1218,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *d  	auth_type = l2cap_get_auth_type(chan);  	if (chan->dcid == L2CAP_CID_LE_DATA) -		hcon = hci_connect(hdev, LE_LINK, dst, +		hcon = hci_connect(hdev, LE_LINK, 0, dst,  					chan->sec_level, auth_type, NULL);  	else -		hcon = hci_connect(hdev, ACL_LINK, dst, +		hcon = hci_connect(hdev, ACL_LINK, 0, dst,  					chan->sec_level, auth_type, NULL);  	if (IS_ERR(hcon)) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 93a19683702..6ce44bc4b85 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1891,10 +1891,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,  		auth_type = HCI_AT_DEDICATED_BONDING_MITM;  	if (cp->addr.type == MGMT_ADDR_BREDR) -		conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr, sec_level, +		conn = hci_connect(hdev, ACL_LINK, 0, &cp->addr.bdaddr, sec_level,  				   auth_type, NULL);  	else -		conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, sec_level, +		conn = hci_connect(hdev, LE_LINK, 0, &cp->addr.bdaddr, sec_level,  				   auth_type, NULL);  	memset(&rp, 0, sizeof(rp)); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 8a602388f1e..9652707b1a0 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -462,7 +462,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)  	switch (d->state) {  	case BT_CONNECT: -	case BT_CONFIG:  		if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {  			set_bit(RFCOMM_AUTH_REJECT, &d->flags);  			rfcomm_schedule(); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 33d88ebd063..cf0301ce2f5 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -178,6 +178,7 @@ static int sco_connect(struct sock *sk)  	bdaddr_t *src = &bt_sk(sk)->src;  	bdaddr_t *dst = &bt_sk(sk)->dst;  	struct bt_sco_parameters *param = &sco_pi(sk)->param; +	__u16 pkt_type = sco_pi(sk)->pkt_type;  	struct sco_conn *conn;  	struct hci_conn *hcon;  	struct hci_dev  *hdev; @@ -193,11 +194,12 @@ static int sco_connect(struct sock *sk)  	if (lmp_esco_capable(hdev) && !disable_esco)  		type = ESCO_LINK; -	else +	else {  		type = SCO_LINK; +		pkt_type &= SCO_ESCO_MASK; +	} -	hcon = hci_connect(hdev, type, dst, -				BT_SECURITY_LOW, HCI_AT_NO_BONDING, param); +	hcon = hci_connect(hdev, type, pkt_type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING, param);  	if (IS_ERR(hcon)) {  		err = PTR_ERR(hcon);  		goto done; @@ -480,18 +482,22 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,  	return 0;  } -static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)  { -	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; +	struct sockaddr_sco sa;  	struct sock *sk = sock->sk; -	bdaddr_t *src = &sa->sco_bdaddr; -	int err = 0; +	bdaddr_t *src = &sa.sco_bdaddr; +	int len, err = 0; -	BT_DBG("sk %p %s", sk, batostr(&sa->sco_bdaddr)); +	BT_DBG("sk %p %s", sk, batostr(&sa.sco_bdaddr));  	if (!addr || addr->sa_family != AF_BLUETOOTH)  		return -EINVAL; +	memset(&sa, 0, sizeof(sa)); +	len = min_t(unsigned int, sizeof(sa), alen); +	memcpy(&sa, addr, len); +  	lock_sock(sk);  	if (sk->sk_state != BT_OPEN) { @@ -505,7 +511,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le  		err = -EADDRINUSE;  	} else {  		/* Save source address */ -		bacpy(&bt_sk(sk)->src, &sa->sco_bdaddr); +		bacpy(&bt_sk(sk)->src, &sa.sco_bdaddr); +		sco_pi(sk)->pkt_type = sa.sco_pkt_type;  		sk->sk_state = BT_BOUND;  	} @@ -518,27 +525,34 @@ done:  static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)  { -	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;  	struct sock *sk = sock->sk; -	int err = 0; - +	struct sockaddr_sco sa; +	int len, err = 0;  	BT_DBG("sk %p", sk); -	if (alen < sizeof(struct sockaddr_sco) || -	    addr->sa_family != AF_BLUETOOTH) +	if (!addr || addr->sa_family != AF_BLUETOOTH)  		return -EINVAL; -	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) -		return -EBADFD; - -	if (sk->sk_type != SOCK_SEQPACKET) -		return -EINVAL; +	memset(&sa, 0, sizeof(sa)); +	len = min_t(unsigned int, sizeof(sa), alen); +	memcpy(&sa, addr, len);  	lock_sock(sk); +	if (sk->sk_type != SOCK_SEQPACKET) { +		err = -EINVAL; +		goto done; +	} + +	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { +		err = -EBADFD; +		goto done; +	} +  	/* Set destination address and psm */ -	bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr); +	bacpy(&bt_sk(sk)->dst, &sa.sco_bdaddr); +	sco_pi(sk)->pkt_type = sa.sco_pkt_type;  	err = sco_connect(sk);  	if (err) @@ -645,6 +659,7 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len  		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->dst);  	else  		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->src); +	sa->sco_pkt_type = sco_pi(sk)->pkt_type;  	return 0;  } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ba829de8442..9ec0822f440 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -38,16 +38,17 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)  	}  #endif -	u64_stats_update_begin(&brstats->syncp); -	brstats->tx_packets++; -	brstats->tx_bytes += skb->len; -	u64_stats_update_end(&brstats->syncp); -  	BR_INPUT_SKB_CB(skb)->brdev = dev;  	skb_reset_mac_header(skb);  	skb_pull(skb, ETH_HLEN); +	u64_stats_update_begin(&brstats->syncp); +	brstats->tx_packets++; +	/* Exclude ETH_HLEN from byte stats for consistency with Rx chain */ +	brstats->tx_bytes += skb->len; +	u64_stats_update_end(&brstats->syncp); +  	rcu_read_lock();  	if (is_broadcast_ether_addr(dest))  		br_flood_deliver(br, skb); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6..c6f177cf713 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,6 +14,7 @@ obj-y     := route.o inetpeer.o protocol.o \  	     inet_fragment.o ping.o  obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o +obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o  obj-$(CONFIG_PROC_FS) += proc.o  obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o  obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 10e3751466b..0b711659ac7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,6 +119,19 @@  #include <linux/mroute.h>  #endif +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> + +static inline int current_has_network(void) +{ +	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ +	return 1; +} +#endif  /* The inetsw table contains everything that inet_create needs to   * build a new socket. @@ -259,6 +272,7 @@ static inline int inet_netns_ok(struct net *net, int protocol)  	return ipprot->netns_ok;  } +  /*   *	Create an inet socket.   */ @@ -275,6 +289,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,  	int try_loading_module = 0;  	int err; +	if (!current_has_network()) +		return -EACCES; +  	if (unlikely(!inet_ehash_secret))  		if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)  			build_ehash_secret(); @@ -881,6 +898,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)  	case SIOCSIFPFLAGS:  	case SIOCGIFPFLAGS:  	case SIOCSIFFLAGS: +	case SIOCKILLADDR:  		err = devinet_ioctl(net, cmd, (void __user *)arg);  		break;  	default: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6e447ff94df..8a9aab37f0a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -58,6 +58,7 @@  #include <net/arp.h>  #include <net/ip.h> +#include <net/tcp.h>  #include <net/route.h>  #include <net/ip_fib.h>  #include <net/rtnetlink.h> @@ -734,6 +735,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)  	case SIOCSIFBRDADDR:	/* Set the broadcast address */  	case SIOCSIFDSTADDR:	/* Set the destination address */  	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */ +	case SIOCKILLADDR:	/* Nuke all sockets on this address */  		ret = -EACCES;  		if (!capable(CAP_NET_ADMIN))  			goto out; @@ -785,7 +787,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)  	}  	ret = -EADDRNOTAVAIL; -	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) +	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS +	    && cmd != SIOCKILLADDR)  		goto done;  	switch (cmd) { @@ -911,6 +914,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)  			inet_insert_ifa(ifa);  		}  		break; +	case SIOCKILLADDR:	/* Nuke all connections on this address */ +		ret = tcp_nuke_addr(net, (struct sockaddr *) sin); +		break;  	}  done:  	rtnl_unlock(); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987..a1dce8a25b4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -123,6 +123,18 @@ config IP_NF_TARGET_REJECT  	  To compile it as a module, choose M here.  If unsure, say N. +config IP_NF_TARGET_REJECT_SKERR +	bool "Force socket error when rejecting with icmp*" +	depends on IP_NF_TARGET_REJECT +	default n +	help +          This option enables turning a "--reject-with icmp*" into a matching +          socket error also. +	  The REJECT target normally allows sending an ICMP message. But it +          leaves the local socket unaware of any ingress rejects. + +	  If unsure, say N. +  config IP_NF_TARGET_ULOG  	tristate "ULOG target support"  	default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8ec72..9dd754c7f2b 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -128,6 +128,14 @@ static void send_reset(struct sk_buff *oldskb, int hook)  static inline void send_unreach(struct sk_buff *skb_in, int code)  {  	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +#ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR +	if (skb_in->sk) { +		skb_in->sk->sk_err = icmp_err_convert[code].errno; +		skb_in->sk->sk_error_report(skb_in->sk); +		pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n", +			skb_in->sk->sk_err, skb_in, skb_in->sk); +	} +#endif  }  static unsigned int diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c new file mode 100644 index 00000000000..0cbbf10026a --- /dev/null +++ b/net/ipv4/sysfs_net_ipv4.c @@ -0,0 +1,88 @@ +/* + * net/ipv4/sysfs_net_ipv4.c + * + * sysfs-based networking knobs (so we can, unlike with sysctl, control perms) + * + * Copyright (C) 2008 Google, Inc. + * + * Robert Love <rlove@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + */ + +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/init.h> +#include <net/tcp.h> + +#define CREATE_IPV4_FILE(_name, _var) \ +static ssize_t _name##_show(struct kobject *kobj, \ +			    struct kobj_attribute *attr, char *buf) \ +{ \ +	return sprintf(buf, "%d\n", _var); \ +} \ +static ssize_t _name##_store(struct kobject *kobj, \ +			     struct kobj_attribute *attr, \ +			     const char *buf, size_t count) \ +{ \ +	int val, ret; \ +	ret = sscanf(buf, "%d", &val); \ +	if (ret != 1) \ +		return -EINVAL; \ +	if (val < 0) \ +		return -EINVAL; \ +	_var = val; \ +	return count; \ +} \ +static struct kobj_attribute _name##_attr = \ +	__ATTR(_name, 0644, _name##_show, _name##_store) + +CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]); +CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]); +CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]); + +CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]); +CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]); +CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]); + +static struct attribute *ipv4_attrs[] = { +	&tcp_wmem_min_attr.attr, +	&tcp_wmem_def_attr.attr, +	&tcp_wmem_max_attr.attr, +	&tcp_rmem_min_attr.attr, +	&tcp_rmem_def_attr.attr, +	&tcp_rmem_max_attr.attr, +	NULL +}; + +static struct attribute_group ipv4_attr_group = { +	.attrs = ipv4_attrs, +}; + +static __init int sysfs_ipv4_init(void) +{ +	struct kobject *ipv4_kobject; +	int ret; + +	ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj); +	if (!ipv4_kobject) +		return -ENOMEM; + +	ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group); +	if (ret) { +		kobject_put(ipv4_kobject); +		return ret; +	} + +	return 0; +} + +subsys_initcall(sysfs_ipv4_init); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6589e11d57b..cf54e10b258 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -268,11 +268,15 @@  #include <linux/crypto.h>  #include <linux/time.h>  #include <linux/slab.h> +#include <linux/uid_stat.h>  #include <net/icmp.h>  #include <net/tcp.h>  #include <net/xfrm.h>  #include <net/ip.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> +#include <net/transp_v6.h>  #include <net/netdma.h>  #include <net/sock.h> @@ -1115,6 +1119,9 @@ out:  	if (copied)  		tcp_push(sk, flags, mss_now, tp->nonagle);  	release_sock(sk); + +	if (copied > 0) +		uid_stat_tcp_snd(current_uid(), copied);  	return copied;  do_fault: @@ -1389,8 +1396,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,  	tcp_rcv_space_adjust(sk);  	/* Clean up data we have read: This will do ACK frames. */ -	if (copied > 0) +	if (copied > 0) {  		tcp_cleanup_rbuf(sk, copied); +		uid_stat_tcp_rcv(current_uid(), copied); +	} +  	return copied;  }  EXPORT_SYMBOL(tcp_read_sock); @@ -1773,6 +1783,9 @@ skip_copy:  	tcp_cleanup_rbuf(sk, copied);  	release_sock(sk); + +	if (copied > 0) +		uid_stat_tcp_rcv(current_uid(), copied);  	return copied;  out: @@ -1781,6 +1794,8 @@ out:  recv_urg:  	err = tcp_recv_urg(sk, msg, len, flags); +	if (err > 0) +		uid_stat_tcp_rcv(current_uid(), err);  	goto out;  }  EXPORT_SYMBOL(tcp_recvmsg); @@ -3327,3 +3342,107 @@ void __init tcp_init(void)  	tcp_secret_retiring = &tcp_secret_two;  	tcp_secret_secondary = &tcp_secret_two;  } + +static int tcp_is_local(struct net *net, __be32 addr) { +	struct rtable *rt; +	struct flowi4 fl4 = { .daddr = addr }; +	rt = ip_route_output_key(net, &fl4); +	if (IS_ERR_OR_NULL(rt)) +		return 0; +	return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int tcp_is_local6(struct net *net, struct in6_addr *addr) { +	struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); +	return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); +} +#endif + +/* + * tcp_nuke_addr - destroy all sockets on the given local address + * if local address is the unspecified address (0.0.0.0 or ::), destroy all + * sockets with local addresses that are not configured. + */ +int tcp_nuke_addr(struct net *net, struct sockaddr *addr) +{ +	int family = addr->sa_family; +	unsigned int bucket; + +	struct in_addr *in; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +	struct in6_addr *in6; +#endif +	if (family == AF_INET) { +		in = &((struct sockaddr_in *)addr)->sin_addr; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +	} else if (family == AF_INET6) { +		in6 = &((struct sockaddr_in6 *)addr)->sin6_addr; +#endif +	} else { +		return -EAFNOSUPPORT; +	} + +	for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { +		struct hlist_nulls_node *node; +		struct sock *sk; +		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); + +restart: +		spin_lock_bh(lock); +		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { +			struct inet_sock *inet = inet_sk(sk); + +			if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) +				continue; +			if (sock_flag(sk, SOCK_DEAD)) +				continue; + +			if (family == AF_INET) { +				__be32 s4 = inet->inet_rcv_saddr; +				if (s4 == LOOPBACK4_IPV6) +					continue; + +				if (in->s_addr != s4 && +				    !(in->s_addr == INADDR_ANY && +				      !tcp_is_local(net, s4))) +					continue; +			} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +			if (family == AF_INET6) { +				struct in6_addr *s6; +				if (!inet->pinet6) +					continue; + +				s6 = &inet->pinet6->rcv_saddr; +				if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) +					continue; + +				if (!ipv6_addr_equal(in6, s6) && +				    !(ipv6_addr_equal(in6, &in6addr_any) && +				      !tcp_is_local6(net, s6))) +				continue; +			} +#endif + +			sock_hold(sk); +			spin_unlock_bh(lock); + +			local_bh_disable(); +			bh_lock_sock(sk); +			sk->sk_err = ETIMEDOUT; +			sk->sk_error_report(sk); + +			tcp_done(sk); +			bh_unlock_sock(sk); +			local_bh_enable(); +			sock_put(sk); + +			goto restart; +		} +		spin_unlock_bh(lock); +	} + +	return 0; +} diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8ed1b930e75..29625e9a51a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,20 @@  #include <asm/uaccess.h>  #include <linux/mroute6.h> +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> + +static inline int current_has_network(void) +{ +	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ +	return 1; +} +#endif +  MODULE_AUTHOR("Cast of dozens");  MODULE_DESCRIPTION("IPv6 protocol stack for Linux");  MODULE_LICENSE("GPL"); @@ -108,6 +122,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,  	int try_loading_module = 0;  	int err; +	if (!current_has_network()) +		return -EACCES; +  	if (sock->type != SOCK_RAW &&  	    sock->type != SOCK_DGRAM &&  	    !inet_ehash_secret) @@ -477,6 +494,21 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,  EXPORT_SYMBOL(inet6_getname); +int inet6_killaddr_ioctl(struct net *net, void __user *arg) { +	struct in6_ifreq ireq; +	struct sockaddr_in6 sin6; + +	if (!capable(CAP_NET_ADMIN)) +		return -EACCES; + +	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) +		return -EFAULT; + +	sin6.sin6_family = AF_INET6; +	sin6.sin6_addr = ireq.ifr6_addr; +	return tcp_nuke_addr(net, (struct sockaddr *) &sin6); +} +  int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)  {  	struct sock *sk = sock->sk; @@ -501,6 +533,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)  		return addrconf_del_ifaddr(net, (void __user *) arg);  	case SIOCSIFDSTADDR:  		return addrconf_set_dstaddr(net, (void __user *) arg); +	case SIOCKILLADDR: +		return inet6_killaddr_ioctl(net, (void __user *) arg);  	default:  		if (!sk->sk_prot->ioctl)  			return -ENOIOCTLCMD; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d33cddd16fb..acd7c9e1da4 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -175,6 +175,18 @@ config IP6_NF_TARGET_REJECT  	  To compile it as a module, choose M here.  If unsure, say N. +config IP6_NF_TARGET_REJECT_SKERR +	bool "Force socket error when rejecting with icmp*" +	depends on IP6_NF_TARGET_REJECT +	default n +	help +          This option enables turning a "--reject-with icmp*" into a matching +          socket error also. +	  The REJECT target normally allows sending an ICMP message. But it +          leaves the local socket unaware of any ingress rejects. + +	  If unsure, say N. +  config IP6_NF_MANGLE  	tristate "Packet mangling"  	default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9d4e1555931..e641f8fa748 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2279,16 +2279,15 @@ static void __exit ip6_tables_fini(void)   * "No next header".   *   * If target header is found, its offset is set in *offset and return protocol - * number. Otherwise, return -1. + * number. Otherwise, return -ENOENT or -EBADMSG.   *   * If the first fragment doesn't contain the final protocol header or   * NEXTHDR_NONE it is considered invalid.   *   * Note that non-1st fragment is special case that "the protocol number   * of last header" is "next header" field in Fragment header. In this case, - * *offset is meaningless and fragment offset is stored in *fragoff if fragoff - * isn't NULL. - * + * *offset is meaningless. If fragoff is not NULL, the fragment offset is + * stored in *fragoff; if it is NULL, return -EINVAL.   */  int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,  		  int target, unsigned short *fragoff) @@ -2329,9 +2328,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,  				if (target < 0 &&  				    ((!ipv6_ext_hdr(hp->nexthdr)) ||  				     hp->nexthdr == NEXTHDR_NONE)) { -					if (fragoff) +					if (fragoff) {  						*fragoff = _frag_off; -					return hp->nexthdr; +						return hp->nexthdr; +					} else { +						return -EINVAL; +					}  				}  				return -ENOENT;  			} diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index aad2fa41cf4..09155e34458 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -178,6 +178,15 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,  		skb_in->dev = net->loopback_dev;  	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR +	if (skb_in->sk) { +		icmpv6_err_convert(ICMPV6_DEST_UNREACH, code, +				   &skb_in->sk->sk_err); +		skb_in->sk->sk_error_report(skb_in->sk); +		pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n", +			skb_in->sk->sk_err, skb_in, skb_in->sk); +	} +#endif  }  static unsigned int diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0c6f67e8f2e..ce2976c0ce7 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -967,6 +967,8 @@ config NETFILTER_XT_MATCH_OWNER  	based on who created the socket: the user or group. It is also  	possible to check whether a socket actually exists. +	Conflicts with '"quota, tag, uid" match' +  config NETFILTER_XT_MATCH_POLICY  	tristate 'IPsec "policy" match support'  	depends on XFRM @@ -1000,6 +1002,22 @@ config NETFILTER_XT_MATCH_PKTTYPE  	  To compile it as a module, choose M here.  If unsure, say N. +config NETFILTER_XT_MATCH_QTAGUID +	bool '"quota, tag, owner" match and stats support' +        depends on NETFILTER_XT_MATCH_SOCKET +	depends on NETFILTER_XT_MATCH_OWNER=n +	help +	  This option replaces the `owner' match. In addition to matching +	  on uid, it keeps stats based on a tag assigned to a socket. +	  The full tag is comprised of a UID and an accounting tag. +	  The tags are assignable to sockets from user space (e.g. a download +	  manager can assign the socket to another UID for accounting). +	  Stats and control are done via /proc/net/xt_qtaguid/. +	  It replaces owner as it takes the same arguments, but should +	  really be recognized by the iptables tool. + +	  If unsure, say `N'. +  config NETFILTER_XT_MATCH_QUOTA  	tristate '"quota" match support'  	depends on NETFILTER_ADVANCED @@ -1010,6 +1028,30 @@ config NETFILTER_XT_MATCH_QUOTA  	  If you want to compile it as a module, say M here and read  	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'. +config NETFILTER_XT_MATCH_QUOTA2 +	tristate '"quota2" match support' +	depends on NETFILTER_ADVANCED +	help +	  This option adds a `quota2' match, which allows to match on a +	  byte counter correctly and not per CPU. +	  It allows naming the quotas. +	  This is based on http://xtables-addons.git.sourceforge.net + +	  If you want to compile it as a module, say M here and read +	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'. + +config NETFILTER_XT_MATCH_QUOTA2_LOG +	bool '"quota2" Netfilter LOG support' +	depends on NETFILTER_XT_MATCH_QUOTA2 +	depends on IP_NF_TARGET_ULOG=n    # not yes, not module, just no +	default n +	help +	  This option allows `quota2' to log ONCE when a quota limit +	  is passed. It logs via NETLINK using the NETLINK_NFLOG family. +	  It logs similarly to how ipt_ULOG would without data. + +	  If unsure, say `N'. +  config NETFILTER_XT_MATCH_RATEEST  	tristate '"rateest" match support'  	depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca3676586f5..452e84de7e7 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -101,7 +101,9 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o  obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o  obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o  obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o  obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o  obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o  obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o  obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f407ebc1348..df91e26f55f 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -5,6 +5,7 @@   * After timer expires a kevent will be sent.   *   * Copyright (C) 2004, 2010 Nokia Corporation + *   * Written by Timo Teras <ext-timo.teras@nokia.com>   *   * Converted to x_tables and reworked for upstream inclusion @@ -38,8 +39,10 @@  #include <linux/netfilter/xt_IDLETIMER.h>  #include <linux/kdev_t.h>  #include <linux/kobject.h> +#include <linux/skbuff.h>  #include <linux/workqueue.h>  #include <linux/sysfs.h> +#include <net/net_namespace.h>  struct idletimer_tg_attr {  	struct attribute attr; @@ -56,6 +59,8 @@ struct idletimer_tg {  	struct idletimer_tg_attr attr;  	unsigned int refcnt; +	bool send_nl_msg; +	bool active;  };  static LIST_HEAD(idletimer_tg_list); @@ -63,6 +68,32 @@ static DEFINE_MUTEX(list_mutex);  static struct kobject *idletimer_tg_kobj; +static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) +{ +	char iface_msg[NLMSG_MAX_SIZE]; +	char state_msg[NLMSG_MAX_SIZE]; +	char *envp[] = { iface_msg, state_msg, NULL }; +	int res; + +	res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", +		       iface); +	if (NLMSG_MAX_SIZE <= res) { +		pr_err("message too long (%d)", res); +		return; +	} +	res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", +		       timer->active ? "active" : "inactive"); +	if (NLMSG_MAX_SIZE <= res) { +		pr_err("message too long (%d)", res); +		return; +	} +	pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); +	kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); +	return; + + +} +  static  struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)  { @@ -83,6 +114,7 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,  {  	struct idletimer_tg *timer;  	unsigned long expires = 0; +	unsigned long now = jiffies;  	mutex_lock(&list_mutex); @@ -92,11 +124,15 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,  	mutex_unlock(&list_mutex); -	if (time_after(expires, jiffies)) +	if (time_after(expires, now))  		return sprintf(buf, "%u\n", -			       jiffies_to_msecs(expires - jiffies) / 1000); +			       jiffies_to_msecs(expires - now) / 1000); -	return sprintf(buf, "0\n"); +	if (timer->send_nl_msg) +		return sprintf(buf, "0 %d\n", +			jiffies_to_msecs(now - expires) / 1000); +	else +		return sprintf(buf, "0\n");  }  static void idletimer_tg_work(struct work_struct *work) @@ -105,6 +141,9 @@ static void idletimer_tg_work(struct work_struct *work)  						  work);  	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); + +	if (timer->send_nl_msg) +		notify_netlink_uevent(timer->attr.attr.name, timer);  }  static void idletimer_tg_expired(unsigned long data) @@ -113,6 +152,7 @@ static void idletimer_tg_expired(unsigned long data)  	pr_debug("timer %s expired\n", timer->attr.attr.name); +	timer->active = false;  	schedule_work(&timer->work);  } @@ -145,6 +185,8 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)  	setup_timer(&info->timer->timer, idletimer_tg_expired,  		    (unsigned long) info->timer);  	info->timer->refcnt = 1; +	info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; +	info->timer->active = true;  	mod_timer(&info->timer->timer,  		  msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -168,14 +210,24 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,  					 const struct xt_action_param *par)  {  	const struct idletimer_tg_info *info = par->targinfo; +	unsigned long now = jiffies;  	pr_debug("resetting timer %s, timeout period %u\n",  		 info->label, info->timeout);  	BUG_ON(!info->timer); +	info->timer->active = true; + +	if (time_before(info->timer->timer.expires, now)) { +		schedule_work(&info->timer->work); +		pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n", +			 info->label, info->timer->timer.expires, now); +	} + +	/* TODO: Avoid modifying timers on each packet */  	mod_timer(&info->timer->timer, -		  msecs_to_jiffies(info->timeout * 1000) + jiffies); +		  msecs_to_jiffies(info->timeout * 1000) + now);  	return XT_CONTINUE;  } @@ -184,8 +236,9 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)  {  	struct idletimer_tg_info *info = par->targinfo;  	int ret; +	unsigned long now = jiffies; -	pr_debug("checkentry targinfo%s\n", info->label); +	pr_debug("checkentry targinfo %s\n", info->label);  	if (info->timeout == 0) {  		pr_debug("timeout value is zero\n"); @@ -204,8 +257,16 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)  	info->timer = __idletimer_tg_find_by_label(info->label);  	if (info->timer) {  		info->timer->refcnt++; +		info->timer->active = true; + +		if (time_before(info->timer->timer.expires, now)) { +			schedule_work(&info->timer->work); +			pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", +				info->timer->timer.expires, now); +		} +  		mod_timer(&info->timer->timer, -			  msecs_to_jiffies(info->timeout * 1000) + jiffies); +			  msecs_to_jiffies(info->timeout * 1000) + now);  		pr_debug("increased refcnt of timer %s to %u\n",  			 info->label, info->timer->refcnt); @@ -219,6 +280,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)  	}  	mutex_unlock(&list_mutex); +  	return 0;  } @@ -240,7 +302,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)  		kfree(info->timer);  	} else {  		pr_debug("decreased refcnt of timer %s to %u\n", -			 info->label, info->timer->refcnt); +		info->label, info->timer->refcnt);  	}  	mutex_unlock(&list_mutex); @@ -248,6 +310,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)  static struct xt_target idletimer_tg __read_mostly = {  	.name		= "IDLETIMER", +	.revision	= 1,  	.family		= NFPROTO_UNSPEC,  	.target		= idletimer_tg_target,  	.targetsize     = sizeof(struct idletimer_tg_info), @@ -313,3 +376,4 @@ MODULE_DESCRIPTION("Xtables: idle time monitor");  MODULE_LICENSE("GPL v2");  MODULE_ALIAS("ipt_IDLETIMER");  MODULE_ALIAS("ip6t_IDLETIMER"); +MODULE_ALIAS("arpt_IDLETIMER"); diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 00000000000..f6d4cfc05f3 --- /dev/null +++ b/net/netfilter/xt_qtaguid.c @@ -0,0 +1,2976 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * There are run-time debug flags enabled via the debug_mask module param, or + * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. + */ +#define DEBUG + +#include <linux/file.h> +#include <linux/inetdevice.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_qtaguid.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/addrconf.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <net/udp.h> + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#include <linux/netfilter_ipv6/ip6_tables.h> +#endif + +#include <linux/netfilter/xt_socket.h> +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +/* + * We only use the xt_socket funcs within a similar context to avoid unexpected + * return values. + */ +#define XT_SOCKET_SUPPORTED_HOOKS \ +	((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) + + +static const char *module_procdirname = "xt_qtaguid"; +static struct proc_dir_entry *xt_qtaguid_procdir; + +static unsigned int proc_iface_perms = S_IRUGO; +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_stats_file; +static unsigned int proc_stats_perms = S_IRUGO; +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_ctrl_file; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; +#else +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; +#endif +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); + +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include <linux/android_aid.h> +static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; +static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; +#else +/* 0 means, don't limit anybody */ +static gid_t proc_stats_readall_gid; +static gid_t proc_ctrl_write_gid; +#endif +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, +		   S_IRUGO | S_IWUSR); +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, +		   S_IRUGO | S_IWUSR); + +/* + * Limit the number of active tags (via socket tags) for a given UID. + * Multiple processes could share the UID. + */ +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); + +/* + * After the kernel has initiallized this module, it is still possible + * to make it passive. + * Setting passive to Y: + *  - the iface stats handling will not act on notifications. + *  - iptables matches will never match. + *  - ctrl commands silently succeed. + *  - stats are always empty. + * This is mostly usefull when a bug is suspected. + */ +static bool module_passive; +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); + +/* + * Control how qtaguid data is tracked per proc/uid. + * Setting tag_tracking_passive to Y: + *  - don't create proc specific structs to track tags + *  - don't check that active tag stats exceed some limits. + *  - don't clean up socket tags on process exits. + * This is mostly usefull when a bug is suspected. + */ +static bool qtu_proc_handling_passive; +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, +		   S_IRUGO | S_IWUSR); + +#define QTU_DEV_NAME "xt_qtaguid" + +uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; +module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); + +/*---------------------------------------------------------------------------*/ +static const char *iface_stat_procdirname = "iface_stat"; +static struct proc_dir_entry *iface_stat_procdir; +/* + * The iface_stat_all* will go away once userspace gets use to the new fields + * that have a format line. + */ +static const char *iface_stat_all_procfilename = "iface_stat_all"; +static struct proc_dir_entry *iface_stat_all_procfile; +static const char *iface_stat_fmt_procfilename = "iface_stat_fmt"; +static struct proc_dir_entry *iface_stat_fmt_procfile; + + +/* + * Ordering of locks: + *  outer locks: + *    iface_stat_list_lock + *    sock_tag_list_lock + *  inner locks: + *    uid_tag_data_tree_lock + *    tag_counter_set_list_lock + * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock + * is acquired. + * + * Call tree with all lock holders as of 2012-04-27: + * + * iface_stat_fmt_proc_read() + *   iface_stat_list_lock + *     (struct iface_stat) + * + * qtaguid_ctrl_proc_read() + *   sock_tag_list_lock + *     (sock_tag_tree) + *     (struct proc_qtu_data->sock_tag_list) + *   prdebug_full_state() + *     sock_tag_list_lock + *       (sock_tag_tree) + *     uid_tag_data_tree_lock + *       (uid_tag_data_tree) + *       (proc_qtu_data_tree) + *     iface_stat_list_lock + * + * qtaguid_stats_proc_read() + *   iface_stat_list_lock + *     struct iface_stat->tag_stat_list_lock + * + * qtudev_open() + *   uid_tag_data_tree_lock + * + * qtudev_release() + *   sock_tag_data_list_lock + *     uid_tag_data_tree_lock + *   prdebug_full_state() + *     sock_tag_list_lock + *     uid_tag_data_tree_lock + *     iface_stat_list_lock + * + * iface_netdev_event_handler() + *   iface_stat_create() + *     iface_stat_list_lock + *   iface_stat_update() + *     iface_stat_list_lock + * + * iface_inetaddr_event_handler() + *   iface_stat_create() + *     iface_stat_list_lock + *   iface_stat_update() + *     iface_stat_list_lock + * + * iface_inet6addr_event_handler() + *   iface_stat_create_ipv6() + *     iface_stat_list_lock + *   iface_stat_update() + *     iface_stat_list_lock + * + * qtaguid_mt() + *   account_for_uid() + *     if_tag_stat_update() + *       get_sock_stat() + *         sock_tag_list_lock + *       struct iface_stat->tag_stat_list_lock + *         tag_stat_update() + *           get_active_counter_set() + *             tag_counter_set_list_lock + *         tag_stat_update() + *           get_active_counter_set() + *             tag_counter_set_list_lock + * + * + * qtaguid_ctrl_parse() + *   ctrl_cmd_delete() + *     sock_tag_list_lock + *     tag_counter_set_list_lock + *     iface_stat_list_lock + *       struct iface_stat->tag_stat_list_lock + *     uid_tag_data_tree_lock + *   ctrl_cmd_counter_set() + *     tag_counter_set_list_lock + *   ctrl_cmd_tag() + *     sock_tag_list_lock + *       (sock_tag_tree) + *       get_tag_ref() + *         uid_tag_data_tree_lock + *           (uid_tag_data_tree) + *       uid_tag_data_tree_lock + *         (proc_qtu_data_tree) + *   ctrl_cmd_untag() + *     sock_tag_list_lock + *     uid_tag_data_tree_lock + * + */ +static LIST_HEAD(iface_stat_list); +static DEFINE_SPINLOCK(iface_stat_list_lock); + +static struct rb_root sock_tag_tree = RB_ROOT; +static DEFINE_SPINLOCK(sock_tag_list_lock); + +static struct rb_root tag_counter_set_tree = RB_ROOT; +static DEFINE_SPINLOCK(tag_counter_set_list_lock); + +static struct rb_root uid_tag_data_tree = RB_ROOT; +static DEFINE_SPINLOCK(uid_tag_data_tree_lock); + +static struct rb_root proc_qtu_data_tree = RB_ROOT; +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ + +static struct qtaguid_event_counts qtu_events; +/*----------------------------------------------*/ +static bool can_manipulate_uids(void) +{ +	/* root pwnd */ +	return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) +		|| in_egroup_p(proc_ctrl_write_gid); +} + +static bool can_impersonate_uid(uid_t uid) +{ +	return uid == current_fsuid() || can_manipulate_uids(); +} + +static bool can_read_other_uid_stats(uid_t uid) +{ +	/* root pwnd */ +	return unlikely(!current_fsuid()) || uid == current_fsuid() +		|| unlikely(!proc_stats_readall_gid) +		|| in_egroup_p(proc_stats_readall_gid); +} + +static inline void dc_add_byte_packets(struct data_counters *counters, int set, +				  enum ifs_tx_rx direction, +				  enum ifs_proto ifs_proto, +				  int bytes, +				  int packets) +{ +	counters->bpc[set][direction][ifs_proto].bytes += bytes; +	counters->bpc[set][direction][ifs_proto].packets += packets; +} + +static inline uint64_t dc_sum_bytes(struct data_counters *counters, +				    int set, +				    enum ifs_tx_rx direction) +{ +	return counters->bpc[set][direction][IFS_TCP].bytes +		+ counters->bpc[set][direction][IFS_UDP].bytes +		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, +				      int set, +				      enum ifs_tx_rx direction) +{ +	return counters->bpc[set][direction][IFS_TCP].packets +		+ counters->bpc[set][direction][IFS_UDP].packets +		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) +{ +	struct rb_node *node = root->rb_node; + +	while (node) { +		struct tag_node *data = rb_entry(node, struct tag_node, node); +		int result; +		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " +			 " node=%p data=%p\n", tag, node, data); +		result = tag_compare(tag, data->tag); +		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " +			 " data.tag=0x%llx (uid=%u) res=%d\n", +			 tag, data->tag, get_uid_from_tag(data->tag), result); +		if (result < 0) +			node = node->rb_left; +		else if (result > 0) +			node = node->rb_right; +		else +			return data; +	} +	return NULL; +} + +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) +{ +	struct rb_node **new = &(root->rb_node), *parent = NULL; + +	/* Figure out where to put new node */ +	while (*new) { +		struct tag_node *this = rb_entry(*new, struct tag_node, +						 node); +		int result = tag_compare(data->tag, this->tag); +		RB_DEBUG("qtaguid: %s(): tag=0x%llx" +			 " (uid=%u)\n", __func__, +			 this->tag, +			 get_uid_from_tag(this->tag)); +		parent = *new; +		if (result < 0) +			new = &((*new)->rb_left); +		else if (result > 0) +			new = &((*new)->rb_right); +		else +			BUG(); +	} + +	/* Add new node and rebalance tree. */ +	rb_link_node(&data->node, parent, new); +	rb_insert_color(&data->node, root); +} + +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +{ +	tag_node_tree_insert(&data->tn, root); +} + +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +{ +	struct tag_node *node = tag_node_tree_search(root, tag); +	if (!node) +		return NULL; +	return rb_entry(&node->node, struct tag_stat, tn.node); +} + +static void tag_counter_set_tree_insert(struct tag_counter_set *data, +					struct rb_root *root) +{ +	tag_node_tree_insert(&data->tn, root); +} + +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, +							   tag_t tag) +{ +	struct tag_node *node = tag_node_tree_search(root, tag); +	if (!node) +		return NULL; +	return rb_entry(&node->node, struct tag_counter_set, tn.node); + +} + +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) +{ +	tag_node_tree_insert(&data->tn, root); +} + +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) +{ +	struct tag_node *node = tag_node_tree_search(root, tag); +	if (!node) +		return NULL; +	return rb_entry(&node->node, struct tag_ref, tn.node); +} + +static struct sock_tag *sock_tag_tree_search(struct rb_root *root, +					     const struct sock *sk) +{ +	struct rb_node *node = root->rb_node; + +	while (node) { +		struct sock_tag *data = rb_entry(node, struct sock_tag, +						 sock_node); +		if (sk < data->sk) +			node = node->rb_left; +		else if (sk > data->sk) +			node = node->rb_right; +		else +			return data; +	} +	return NULL; +} + +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) +{ +	struct rb_node **new = &(root->rb_node), *parent = NULL; + +	/* Figure out where to put new node */ +	while (*new) { +		struct sock_tag *this = rb_entry(*new, struct sock_tag, +						 sock_node); +		parent = *new; +		if (data->sk < this->sk) +			new = &((*new)->rb_left); +		else if (data->sk > this->sk) +			new = &((*new)->rb_right); +		else +			BUG(); +	} + +	/* Add new node and rebalance tree. */ +	rb_link_node(&data->sock_node, parent, new); +	rb_insert_color(&data->sock_node, root); +} + +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) +{ +	struct rb_node *node; +	struct sock_tag *st_entry; + +	node = rb_first(st_to_free_tree); +	while (node) { +		st_entry = rb_entry(node, struct sock_tag, sock_node); +		node = rb_next(node); +		CT_DEBUG("qtaguid: %s(): " +			 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, +			 st_entry->sk, +			 st_entry->tag, +			 get_uid_from_tag(st_entry->tag)); +		rb_erase(&st_entry->sock_node, st_to_free_tree); +		sockfd_put(st_entry->socket); +		kfree(st_entry); +	} +} + +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, +						       const pid_t pid) +{ +	struct rb_node *node = root->rb_node; + +	while (node) { +		struct proc_qtu_data *data = rb_entry(node, +						      struct proc_qtu_data, +						      node); +		if (pid < data->pid) +			node = node->rb_left; +		else if (pid > data->pid) +			node = node->rb_right; +		else +			return data; +	} +	return NULL; +} + +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, +				      struct rb_root *root) +{ +	struct rb_node **new = &(root->rb_node), *parent = NULL; + +	/* Figure out where to put new node */ +	while (*new) { +		struct proc_qtu_data *this = rb_entry(*new, +						      struct proc_qtu_data, +						      node); +		parent = *new; +		if (data->pid < this->pid) +			new = &((*new)->rb_left); +		else if (data->pid > this->pid) +			new = &((*new)->rb_right); +		else +			BUG(); +	} + +	/* Add new node and rebalance tree. */ +	rb_link_node(&data->node, parent, new); +	rb_insert_color(&data->node, root); +} + +static void uid_tag_data_tree_insert(struct uid_tag_data *data, +				     struct rb_root *root) +{ +	struct rb_node **new = &(root->rb_node), *parent = NULL; + +	/* Figure out where to put new node */ +	while (*new) { +		struct uid_tag_data *this = rb_entry(*new, +						     struct uid_tag_data, +						     node); +		parent = *new; +		if (data->uid < this->uid) +			new = &((*new)->rb_left); +		else if (data->uid > this->uid) +			new = &((*new)->rb_right); +		else +			BUG(); +	} + +	/* Add new node and rebalance tree. */ +	rb_link_node(&data->node, parent, new); +	rb_insert_color(&data->node, root); +} + +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, +						     uid_t uid) +{ +	struct rb_node *node = root->rb_node; + +	while (node) { +		struct uid_tag_data *data = rb_entry(node, +						     struct uid_tag_data, +						     node); +		if (uid < data->uid) +			node = node->rb_left; +		else if (uid > data->uid) +			node = node->rb_right; +		else +			return data; +	} +	return NULL; +} + +/* + * Allocates a new uid_tag_data struct if needed. + * Returns a pointer to the found or allocated uid_tag_data. + * Returns a PTR_ERR on failures, and lock is not held. + * If found is not NULL: + *   sets *found to true if not allocated. + *   sets *found to false if allocated. + */ +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) +{ +	struct uid_tag_data *utd_entry; + +	/* Look for top level uid_tag_data for the UID */ +	utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); +	DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); + +	if (found_res) +		*found_res = utd_entry; +	if (utd_entry) +		return utd_entry; + +	utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); +	if (!utd_entry) { +		pr_err("qtaguid: get_uid_data(%u): " +		       "tag data alloc failed\n", uid); +		return ERR_PTR(-ENOMEM); +	} + +	utd_entry->uid = uid; +	utd_entry->tag_ref_tree = RB_ROOT; +	uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); +	DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); +	return utd_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *new_tag_ref(tag_t new_tag, +				   struct uid_tag_data *utd_entry) +{ +	struct tag_ref *tr_entry; +	int res; + +	if (utd_entry->num_active_tags + 1 > max_sock_tags) { +		pr_info("qtaguid: new_tag_ref(0x%llx): " +			"tag ref alloc quota exceeded. max=%d\n", +			new_tag, max_sock_tags); +		res = -EMFILE; +		goto err_res; + +	} + +	tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); +	if (!tr_entry) { +		pr_err("qtaguid: new_tag_ref(0x%llx): " +		       "tag ref alloc failed\n", +		       new_tag); +		res = -ENOMEM; +		goto err_res; +	} +	tr_entry->tn.tag = new_tag; +	/* tr_entry->num_sock_tags  handled by caller */ +	utd_entry->num_active_tags++; +	tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); +	DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " +		 " inserted new tag ref %p\n", +		 new_tag, tr_entry); +	return tr_entry; + +err_res: +	return ERR_PTR(res); +} + +static struct tag_ref *lookup_tag_ref(tag_t full_tag, +				      struct uid_tag_data **utd_res) +{ +	struct uid_tag_data *utd_entry; +	struct tag_ref *tr_entry; +	bool found_utd; +	uid_t uid = get_uid_from_tag(full_tag); + +	DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", +		 full_tag, uid); + +	utd_entry = get_uid_data(uid, &found_utd); +	if (IS_ERR_OR_NULL(utd_entry)) { +		if (utd_res) +			*utd_res = utd_entry; +		return NULL; +	} + +	tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); +	if (utd_res) +		*utd_res = utd_entry; +	DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", +		 full_tag, utd_entry, tr_entry); +	return tr_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *get_tag_ref(tag_t full_tag, +				   struct uid_tag_data **utd_res) +{ +	struct uid_tag_data *utd_entry; +	struct tag_ref *tr_entry; + +	DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", +		 full_tag); +	spin_lock_bh(&uid_tag_data_tree_lock); +	tr_entry = lookup_tag_ref(full_tag, &utd_entry); +	BUG_ON(IS_ERR_OR_NULL(utd_entry)); +	if (!tr_entry) +		tr_entry = new_tag_ref(full_tag, utd_entry); + +	spin_unlock_bh(&uid_tag_data_tree_lock); +	if (utd_res) +		*utd_res = utd_entry; +	DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", +		 full_tag, utd_entry, tr_entry); +	return tr_entry; +} + +/* Checks and maybe frees the UID Tag Data entry */ +static void put_utd_entry(struct uid_tag_data *utd_entry) +{ +	/* Are we done with the UID tag data entry? */ +	if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && +		!utd_entry->num_pqd) { +		DR_DEBUG("qtaguid: %s(): " +			 "erase utd_entry=%p uid=%u " +			 "by pid=%u tgid=%u uid=%u\n", __func__, +			 utd_entry, utd_entry->uid, +			 current->pid, current->tgid, current_fsuid()); +		BUG_ON(utd_entry->num_active_tags); +		rb_erase(&utd_entry->node, &uid_tag_data_tree); +		kfree(utd_entry); +	} else { +		DR_DEBUG("qtaguid: %s(): " +			 "utd_entry=%p still has %d tags %d proc_qtu_data\n", +			 __func__, utd_entry, utd_entry->num_active_tags, +			 utd_entry->num_pqd); +		BUG_ON(!(utd_entry->num_active_tags || +			 utd_entry->num_pqd)); +	} +} + +/* + * If no sock_tags are using this tag_ref, + * decrements refcount of utd_entry, removes tr_entry + * from utd_entry->tag_ref_tree and frees. + */ +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, +					struct uid_tag_data *utd_entry) +{ +	DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, +		 tr_entry, tr_entry->tn.tag, +		 get_uid_from_tag(tr_entry->tn.tag)); +	if (!tr_entry->num_sock_tags) { +		BUG_ON(!utd_entry->num_active_tags); +		utd_entry->num_active_tags--; +		rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); +		DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); +		kfree(tr_entry); +	} +} + +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) +{ +	struct rb_node *node; +	struct tag_ref *tr_entry; +	tag_t acct_tag; + +	DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, +		 full_tag, get_uid_from_tag(full_tag)); +	acct_tag = get_atag_from_tag(full_tag); +	node = rb_first(&utd_entry->tag_ref_tree); +	while (node) { +		tr_entry = rb_entry(node, struct tag_ref, tn.node); +		node = rb_next(node); +		if (!acct_tag || tr_entry->tn.tag == full_tag) +			free_tag_ref_from_utd_entry(tr_entry, utd_entry); +	} +} + +static int read_proc_u64(char *page, char **start, off_t off, +			int count, int *eof, void *data) +{ +	int len; +	uint64_t value; +	char *p = page; +	uint64_t *iface_entry = data; + +	if (!data) +		return 0; + +	value = *iface_entry; +	p += sprintf(p, "%llu\n", value); +	len = (p - page) - off; +	*eof = (len <= count) ? 1 : 0; +	*start = page + off; +	return len; +} + +static int read_proc_bool(char *page, char **start, off_t off, +			int count, int *eof, void *data) +{ +	int len; +	bool value; +	char *p = page; +	bool *bool_entry = data; + +	if (!data) +		return 0; + +	value = *bool_entry; +	p += sprintf(p, "%u\n", value); +	len = (p - page) - off; +	*eof = (len <= count) ? 1 : 0; +	*start = page + off; +	return len; +} + +static int get_active_counter_set(tag_t tag) +{ +	int active_set = 0; +	struct tag_counter_set *tcs; + +	MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" +		 " (uid=%u)\n", +		 tag, get_uid_from_tag(tag)); +	/* For now we only handle UID tags for active sets */ +	tag = get_utag_from_tag(tag); +	spin_lock_bh(&tag_counter_set_list_lock); +	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (tcs) +		active_set = tcs->active_set; +	spin_unlock_bh(&tag_counter_set_list_lock); +	return active_set; +} + +/* + * Find the entry for tracking the specified interface. + * Caller must hold iface_stat_list_lock + */ +static struct iface_stat *get_iface_entry(const char *ifname) +{ +	struct iface_stat *iface_entry; + +	/* Find the entry for tracking the specified tag within the interface */ +	if (ifname == NULL) { +		pr_info("qtaguid: iface_stat: get() NULL device name\n"); +		return NULL; +	} + +	/* Iterate over interfaces */ +	list_for_each_entry(iface_entry, &iface_stat_list, list) { +		if (!strcmp(ifname, iface_entry->ifname)) +			goto done; +	} +	iface_entry = NULL; +done: +	return iface_entry; +} + +static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, +				    off_t items_to_skip, int char_count, +				    int *eof, void *data) +{ +	char *outp = page; +	int item_index = 0; +	int len; +	int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */ +	struct iface_stat *iface_entry; +	struct rtnl_link_stats64 dev_stats, *stats; +	struct rtnl_link_stats64 no_dev_stats = {0}; + +	if (unlikely(module_passive)) { +		*eof = 1; +		return 0; +	} + +	CT_DEBUG("qtaguid:proc iface_stat_fmt " +		 "pid=%u tgid=%u uid=%u " +		 "page=%p *num_items_returned=%p off=%ld " +		 "char_count=%d *eof=%d\n", +		 current->pid, current->tgid, current_fsuid(), +		 page, *num_items_returned, +		 items_to_skip, char_count, *eof); + +	if (*eof) +		return 0; + +	if (fmt == 2 && item_index++ >= items_to_skip) { +		len = snprintf(outp, char_count, +			       "ifname " +			       "total_skb_rx_bytes total_skb_rx_packets " +			       "total_skb_tx_bytes total_skb_tx_packets\n" +			); +		if (len >= char_count) { +			*outp = '\0'; +			return outp - page; +		} +		outp += len; +		char_count -= len; +		(*num_items_returned)++; +	} + +	/* +	 * This lock will prevent iface_stat_update() from changing active, +	 * and in turn prevent an interface from unregistering itself. +	 */ +	spin_lock_bh(&iface_stat_list_lock); +	list_for_each_entry(iface_entry, &iface_stat_list, list) { +		if (item_index++ < items_to_skip) +			continue; + +		if (iface_entry->active) { +			stats = dev_get_stats(iface_entry->net_dev, +					      &dev_stats); +		} else { +			stats = &no_dev_stats; +		} +		/* +		 * If the meaning of the data changes, then update the fmtX +		 * string. +		 */ +		if (fmt == 1) { +			len = snprintf( +				outp, char_count, +				"%s %d " +				"%llu %llu %llu %llu " +				"%llu %llu %llu %llu\n", +				iface_entry->ifname, +				iface_entry->active, +				iface_entry->totals_via_dev[IFS_RX].bytes, +				iface_entry->totals_via_dev[IFS_RX].packets, +				iface_entry->totals_via_dev[IFS_TX].bytes, +				iface_entry->totals_via_dev[IFS_TX].packets, +				stats->rx_bytes, stats->rx_packets, +				stats->tx_bytes, stats->tx_packets +				); +		} else { +			len = snprintf( +				outp, char_count, +				"%s " +				"%llu %llu %llu %llu\n", +				iface_entry->ifname, +				iface_entry->totals_via_skb[IFS_RX].bytes, +				iface_entry->totals_via_skb[IFS_RX].packets, +				iface_entry->totals_via_skb[IFS_TX].bytes, +				iface_entry->totals_via_skb[IFS_TX].packets +				); +		} +		if (len >= char_count) { +			spin_unlock_bh(&iface_stat_list_lock); +			*outp = '\0'; +			return outp - page; +		} +		outp += len; +		char_count -= len; +		(*num_items_returned)++; +	} +	spin_unlock_bh(&iface_stat_list_lock); + +	*eof = 1; +	return outp - page; +} + +static void iface_create_proc_worker(struct work_struct *work) +{ +	struct proc_dir_entry *proc_entry; +	struct iface_stat_work *isw = container_of(work, struct iface_stat_work, +						   iface_work); +	struct iface_stat *new_iface  = isw->iface_entry; + +	/* iface_entries are not deleted, so safe to manipulate. */ +	proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); +	if (IS_ERR_OR_NULL(proc_entry)) { +		pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); +		kfree(isw); +		return; +	} + +	new_iface->proc_ptr = proc_entry; + +	create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, +			       read_proc_u64, +			       &new_iface->totals_via_dev[IFS_TX].bytes); +	create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, +			       read_proc_u64, +			       &new_iface->totals_via_dev[IFS_RX].bytes); +	create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, +			       read_proc_u64, +			       &new_iface->totals_via_dev[IFS_TX].packets); +	create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, +			       read_proc_u64, +			       &new_iface->totals_via_dev[IFS_RX].packets); +	create_proc_read_entry("active", proc_iface_perms, proc_entry, +			read_proc_bool, &new_iface->active); + +	IF_DEBUG("qtaguid: iface_stat: create_proc(): done " +		 "entry=%p dev=%s\n", new_iface, new_iface->ifname); +	kfree(isw); +} + +/* + * Will set the entry's active state, and + * update the net_dev accordingly also. + */ +static void _iface_stat_set_active(struct iface_stat *entry, +				   struct net_device *net_dev, +				   bool activate) +{ +	if (activate) { +		entry->net_dev = net_dev; +		entry->active = true; +		IF_DEBUG("qtaguid: %s(%s): " +			 "enable tracking. rfcnt=%d\n", __func__, +			 entry->ifname, +			 percpu_read(*net_dev->pcpu_refcnt)); +	} else { +		entry->active = false; +		entry->net_dev = NULL; +		IF_DEBUG("qtaguid: %s(%s): " +			 "disable tracking. rfcnt=%d\n", __func__, +			 entry->ifname, +			 percpu_read(*net_dev->pcpu_refcnt)); + +	} +} + +/* Caller must hold iface_stat_list_lock */ +static struct iface_stat *iface_alloc(struct net_device *net_dev) +{ +	struct iface_stat *new_iface; +	struct iface_stat_work *isw; + +	new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); +	if (new_iface == NULL) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "iface_stat alloc failed\n", net_dev->name); +		return NULL; +	} +	new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); +	if (new_iface->ifname == NULL) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "ifname alloc failed\n", net_dev->name); +		kfree(new_iface); +		return NULL; +	} +	spin_lock_init(&new_iface->tag_stat_list_lock); +	new_iface->tag_stat_tree = RB_ROOT; +	_iface_stat_set_active(new_iface, net_dev, true); + +	/* +	 * ipv6 notifier chains are atomic :( +	 * No create_proc_read_entry() for you! +	 */ +	isw = kmalloc(sizeof(*isw), GFP_ATOMIC); +	if (!isw) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "work alloc failed\n", new_iface->ifname); +		_iface_stat_set_active(new_iface, net_dev, false); +		kfree(new_iface->ifname); +		kfree(new_iface); +		return NULL; +	} +	isw->iface_entry = new_iface; +	INIT_WORK(&isw->iface_work, iface_create_proc_worker); +	schedule_work(&isw->iface_work); +	list_add(&new_iface->list, &iface_stat_list); +	return new_iface; +} + +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, +					       struct iface_stat *iface) +{ +	struct rtnl_link_stats64 dev_stats, *stats; +	bool stats_rewound; + +	stats = dev_get_stats(net_dev, &dev_stats); +	/* No empty packets */ +	stats_rewound = +		(stats->rx_bytes < iface->last_known[IFS_RX].bytes) +		|| (stats->tx_bytes < iface->last_known[IFS_TX].bytes); + +	IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " +		 "bytes rx/tx=%llu/%llu " +		 "active=%d last_known=%d " +		 "stats_rewound=%d\n", __func__, +		 net_dev ? net_dev->name : "?", +		 iface, net_dev, +		 stats->rx_bytes, stats->tx_bytes, +		 iface->active, iface->last_known_valid, stats_rewound); + +	if (iface->active && iface->last_known_valid && stats_rewound) { +		pr_warn_once("qtaguid: iface_stat: %s(%s): " +			     "iface reset its stats unexpectedly\n", __func__, +			     net_dev->name); + +		iface->totals_via_dev[IFS_TX].bytes += +			iface->last_known[IFS_TX].bytes; +		iface->totals_via_dev[IFS_TX].packets += +			iface->last_known[IFS_TX].packets; +		iface->totals_via_dev[IFS_RX].bytes += +			iface->last_known[IFS_RX].bytes; +		iface->totals_via_dev[IFS_RX].packets += +			iface->last_known[IFS_RX].packets; +		iface->last_known_valid = false; +		IF_DEBUG("qtaguid: %s(%s): iface=%p " +			 "used last known bytes rx/tx=%llu/%llu\n", __func__, +			 iface->ifname, iface, iface->last_known[IFS_RX].bytes, +			 iface->last_known[IFS_TX].bytes); +	} +} + +/* + * Create a new entry for tracking the specified interface. + * Do nothing if the entry already exists. + * Called when an interface is configured with a valid IP address. + */ +static void iface_stat_create(struct net_device *net_dev, +			      struct in_ifaddr *ifa) +{ +	struct in_device *in_dev = NULL; +	const char *ifname; +	struct iface_stat *entry; +	__be32 ipaddr = 0; +	struct iface_stat *new_iface; + +	IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", +		 net_dev ? net_dev->name : "?", +		 ifa, net_dev); +	if (!net_dev) { +		pr_err("qtaguid: iface_stat: create(): no net dev\n"); +		return; +	} + +	ifname = net_dev->name; +	if (!ifa) { +		in_dev = in_dev_get(net_dev); +		if (!in_dev) { +			pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", +			       ifname); +			return; +		} +		IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", +			 ifname, in_dev); +		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { +			IF_DEBUG("qtaguid: iface_stat: create(%s): " +				 "ifa=%p ifa_label=%s\n", +				 ifname, ifa, +				 ifa->ifa_label ? ifa->ifa_label : "(null)"); +			if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) +				break; +		} +	} + +	if (!ifa) { +		IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", +			 ifname); +		goto done_put; +	} +	ipaddr = ifa->ifa_local; + +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(ifname); +	if (entry != NULL) { +		bool activate = !ipv4_is_loopback(ipaddr); +		IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", +			 ifname, entry); +		iface_check_stats_reset_and_adjust(net_dev, entry); +		_iface_stat_set_active(entry, net_dev, activate); +		IF_DEBUG("qtaguid: %s(%s): " +			 "tracking now %d on ip=%pI4\n", __func__, +			 entry->ifname, activate, &ipaddr); +		goto done_unlock_put; +	} else if (ipv4_is_loopback(ipaddr)) { +		IF_DEBUG("qtaguid: iface_stat: create(%s): " +			 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); +		goto done_unlock_put; +	} + +	new_iface = iface_alloc(net_dev); +	IF_DEBUG("qtaguid: iface_stat: create(%s): done " +		 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); +done_unlock_put: +	spin_unlock_bh(&iface_stat_list_lock); +done_put: +	if (in_dev) +		in_dev_put(in_dev); +} + +static void iface_stat_create_ipv6(struct net_device *net_dev, +				   struct inet6_ifaddr *ifa) +{ +	struct in_device *in_dev; +	const char *ifname; +	struct iface_stat *entry; +	struct iface_stat *new_iface; +	int addr_type; + +	IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", +		 ifa, net_dev, net_dev ? net_dev->name : ""); +	if (!net_dev) { +		pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); +		return; +	} +	ifname = net_dev->name; + +	in_dev = in_dev_get(net_dev); +	if (!in_dev) { +		pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", +		       ifname); +		return; +	} + +	IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", +		 ifname, in_dev); + +	if (!ifa) { +		IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", +			 ifname); +		goto done_put; +	} +	addr_type = ipv6_addr_type(&ifa->addr); + +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(ifname); +	if (entry != NULL) { +		bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); +		IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, +			 ifname, entry); +		iface_check_stats_reset_and_adjust(net_dev, entry); +		_iface_stat_set_active(entry, net_dev, activate); +		IF_DEBUG("qtaguid: %s(%s): " +			 "tracking now %d on ip=%pI6c\n", __func__, +			 entry->ifname, activate, &ifa->addr); +		goto done_unlock_put; +	} else if (addr_type & IPV6_ADDR_LOOPBACK) { +		IF_DEBUG("qtaguid: %s(%s): " +			 "ignore loopback dev. ip=%pI6c\n", __func__, +			 ifname, &ifa->addr); +		goto done_unlock_put; +	} + +	new_iface = iface_alloc(net_dev); +	IF_DEBUG("qtaguid: iface_stat: create6(%s): done " +		 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); + +done_unlock_put: +	spin_unlock_bh(&iface_stat_list_lock); +done_put: +	in_dev_put(in_dev); +} + +static struct sock_tag *get_sock_stat_nl(const struct sock *sk) +{ +	MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); +	return sock_tag_tree_search(&sock_tag_tree, sk); +} + +static struct sock_tag *get_sock_stat(const struct sock *sk) +{ +	struct sock_tag *sock_tag_entry; +	MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); +	if (!sk) +		return NULL; +	spin_lock_bh(&sock_tag_list_lock); +	sock_tag_entry = get_sock_stat_nl(sk); +	spin_unlock_bh(&sock_tag_list_lock); +	return sock_tag_entry; +} + +static int ipx_proto(const struct sk_buff *skb, +		     struct xt_action_param *par) +{ +	int thoff, tproto; + +	switch (par->family) { +	case NFPROTO_IPV6: +		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); +		if (tproto < 0) +			MT_DEBUG("%s(): transport header not found in ipv6" +				 " skb=%p\n", __func__, skb); +		break; +	case NFPROTO_IPV4: +		tproto = ip_hdr(skb)->protocol; +		break; +	default: +		tproto = IPPROTO_RAW; +	} +	return tproto; +} + +static void +data_counters_update(struct data_counters *dc, int set, +		     enum ifs_tx_rx direction, int proto, int bytes) +{ +	switch (proto) { +	case IPPROTO_TCP: +		dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); +		break; +	case IPPROTO_UDP: +		dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); +		break; +	case IPPROTO_IP: +	default: +		dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, +				    1); +		break; +	} +} + +/* + * Update stats for the specified interface. Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called when an device is being unregistered. + */ +static void iface_stat_update(struct net_device *net_dev, bool stash_only) +{ +	struct rtnl_link_stats64 dev_stats, *stats; +	struct iface_stat *entry; + +	stats = dev_get_stats(net_dev, &dev_stats); +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(net_dev->name); +	if (entry == NULL) { +		IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", +			 net_dev->name); +		spin_unlock_bh(&iface_stat_list_lock); +		return; +	} + +	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, +		 net_dev->name, entry); +	if (!entry->active) { +		IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, +			 net_dev->name); +		spin_unlock_bh(&iface_stat_list_lock); +		return; +	} + +	if (stash_only) { +		entry->last_known[IFS_TX].bytes = stats->tx_bytes; +		entry->last_known[IFS_TX].packets = stats->tx_packets; +		entry->last_known[IFS_RX].bytes = stats->rx_bytes; +		entry->last_known[IFS_RX].packets = stats->rx_packets; +		entry->last_known_valid = true; +		IF_DEBUG("qtaguid: %s(%s): " +			 "dev stats stashed rx/tx=%llu/%llu\n", __func__, +			 net_dev->name, stats->rx_bytes, stats->tx_bytes); +		spin_unlock_bh(&iface_stat_list_lock); +		return; +	} +	entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes; +	entry->totals_via_dev[IFS_TX].packets += stats->tx_packets; +	entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes; +	entry->totals_via_dev[IFS_RX].packets += stats->rx_packets; +	/* We don't need the last_known[] anymore */ +	entry->last_known_valid = false; +	_iface_stat_set_active(entry, net_dev, false); +	IF_DEBUG("qtaguid: %s(%s): " +		 "disable tracking. rx/tx=%llu/%llu\n", __func__, +		 net_dev->name, stats->rx_bytes, stats->tx_bytes); +	spin_unlock_bh(&iface_stat_list_lock); +} + +/* + * Update stats for the specified interface from the skb. + * Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called on each sk. + */ +static void iface_stat_update_from_skb(const struct sk_buff *skb, +				       struct xt_action_param *par) +{ +	struct iface_stat *entry; +	const struct net_device *el_dev; +	enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; +	int bytes = skb->len; + +	if (!skb->dev) { +		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); +		el_dev = par->in ? : par->out; +	} else { +		const struct net_device *other_dev; +		el_dev = skb->dev; +		other_dev = par->in ? : par->out; +		if (el_dev != other_dev) { +			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " +				 "par->(in/out)=%p %s\n", +				 par->hooknum, el_dev, el_dev->name, other_dev, +				 other_dev->name); +		} +	} + +	if (unlikely(!el_dev)) { +		pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", +		       par->hooknum, __func__); +		BUG(); +	} else if (unlikely(!el_dev->name)) { +		pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", +		       par->hooknum, __func__); +		BUG(); +	} else { +		int proto = ipx_proto(skb, par); +		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", +			 par->hooknum, el_dev->name, el_dev->type, +			 par->family, proto); +	} + +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(el_dev->name); +	if (entry == NULL) { +		IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", +			 __func__, el_dev->name); +		spin_unlock_bh(&iface_stat_list_lock); +		return; +	} + +	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, +		 el_dev->name, entry); + +	entry->totals_via_skb[direction].bytes += bytes; +	entry->totals_via_skb[direction].packets++; +	spin_unlock_bh(&iface_stat_list_lock); +} + +static void tag_stat_update(struct tag_stat *tag_entry, +			enum ifs_tx_rx direction, int proto, int bytes) +{ +	int active_set; +	active_set = get_active_counter_set(tag_entry->tn.tag); +	MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " +		 "dir=%d proto=%d bytes=%d)\n", +		 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), +		 active_set, direction, proto, bytes); +	data_counters_update(&tag_entry->counters, active_set, direction, +			     proto, bytes); +	if (tag_entry->parent_counters) +		data_counters_update(tag_entry->parent_counters, active_set, +				     direction, proto, bytes); +} + +/* + * Create a new entry for tracking the specified {acct_tag,uid_tag} within + * the interface. + * iface_entry->tag_stat_list_lock should be held. + */ +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, +					   tag_t tag) +{ +	struct tag_stat *new_tag_stat_entry = NULL; +	IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" +		 " (uid=%u)\n", __func__, +		 iface_entry, tag, get_uid_from_tag(tag)); +	new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); +	if (!new_tag_stat_entry) { +		pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); +		goto done; +	} +	new_tag_stat_entry->tn.tag = tag; +	tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); +done: +	return new_tag_stat_entry; +} + +static void if_tag_stat_update(const char *ifname, uid_t uid, +			       const struct sock *sk, enum ifs_tx_rx direction, +			       int proto, int bytes) +{ +	struct tag_stat *tag_stat_entry; +	tag_t tag, acct_tag; +	tag_t uid_tag; +	struct data_counters *uid_tag_counters; +	struct sock_tag *sock_tag_entry; +	struct iface_stat *iface_entry; +	struct tag_stat *new_tag_stat = NULL; +	MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " +		"uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", +		 ifname, uid, sk, direction, proto, bytes); + + +	iface_entry = get_iface_entry(ifname); +	if (!iface_entry) { +		pr_err("qtaguid: iface_stat: stat_update() %s not found\n", +		       ifname); +		return; +	} +	/* It is ok to process data when an iface_entry is inactive */ + +	MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", +		 ifname, iface_entry); + +	/* +	 * Look for a tagged sock. +	 * It will have an acct_uid. +	 */ +	sock_tag_entry = get_sock_stat(sk); +	if (sock_tag_entry) { +		tag = sock_tag_entry->tag; +		acct_tag = get_atag_from_tag(tag); +		uid_tag = get_utag_from_tag(tag); +	} else { +		acct_tag = make_atag_from_value(0); +		tag = combine_atag_with_uid(acct_tag, uid); +		uid_tag = make_tag_from_uid(uid); +	} +	MT_DEBUG("qtaguid: iface_stat: stat_update(): " +		 " looking for tag=0x%llx (uid=%u) in ife=%p\n", +		 tag, get_uid_from_tag(tag), iface_entry); +	/* Loop over tag list under this interface for {acct_tag,uid_tag} */ +	spin_lock_bh(&iface_entry->tag_stat_list_lock); + +	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, +					      tag); +	if (tag_stat_entry) { +		/* +		 * Updating the {acct_tag, uid_tag} entry handles both stats: +		 * {0, uid_tag} will also get updated. +		 */ +		tag_stat_update(tag_stat_entry, direction, proto, bytes); +		spin_unlock_bh(&iface_entry->tag_stat_list_lock); +		return; +	} + +	/* Loop over tag list under this interface for {0,uid_tag} */ +	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, +					      uid_tag); +	if (!tag_stat_entry) { +		/* Here: the base uid_tag did not exist */ +		/* +		 * No parent counters. So +		 *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. +		 */ +		new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); +		uid_tag_counters = &new_tag_stat->counters; +	} else { +		uid_tag_counters = &tag_stat_entry->counters; +	} + +	if (acct_tag) { +		/* Create the child {acct_tag, uid_tag} and hook up parent. */ +		new_tag_stat = create_if_tag_stat(iface_entry, tag); +		new_tag_stat->parent_counters = uid_tag_counters; +	} else { +		/* +		 * For new_tag_stat to be still NULL here would require: +		 *  {0, uid_tag} exists +		 *  and {acct_tag, uid_tag} doesn't exist +		 *  AND acct_tag == 0. +		 * Impossible. This reassures us that new_tag_stat +		 * below will always be assigned. +		 */ +		BUG_ON(!new_tag_stat); +	} +	tag_stat_update(new_tag_stat, direction, proto, bytes); +	spin_unlock_bh(&iface_entry->tag_stat_list_lock); +} + +static int iface_netdev_event_handler(struct notifier_block *nb, +				      unsigned long event, void *ptr) { +	struct net_device *dev = ptr; + +	if (unlikely(module_passive)) +		return NOTIFY_DONE; + +	IF_DEBUG("qtaguid: iface_stat: netdev_event(): " +		 "ev=0x%lx/%s netdev=%p->name=%s\n", +		 event, netdev_evt_str(event), dev, dev ? dev->name : ""); + +	switch (event) { +	case NETDEV_UP: +		iface_stat_create(dev, NULL); +		atomic64_inc(&qtu_events.iface_events); +		break; +	case NETDEV_DOWN: +	case NETDEV_UNREGISTER: +		iface_stat_update(dev, event == NETDEV_DOWN); +		atomic64_inc(&qtu_events.iface_events); +		break; +	} +	return NOTIFY_DONE; +} + +static int iface_inet6addr_event_handler(struct notifier_block *nb, +					 unsigned long event, void *ptr) +{ +	struct inet6_ifaddr *ifa = ptr; +	struct net_device *dev; + +	if (unlikely(module_passive)) +		return NOTIFY_DONE; + +	IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " +		 "ev=0x%lx/%s ifa=%p\n", +		 event, netdev_evt_str(event), ifa); + +	switch (event) { +	case NETDEV_UP: +		BUG_ON(!ifa || !ifa->idev); +		dev = (struct net_device *)ifa->idev->dev; +		iface_stat_create_ipv6(dev, ifa); +		atomic64_inc(&qtu_events.iface_events); +		break; +	case NETDEV_DOWN: +	case NETDEV_UNREGISTER: +		BUG_ON(!ifa || !ifa->idev); +		dev = (struct net_device *)ifa->idev->dev; +		iface_stat_update(dev, event == NETDEV_DOWN); +		atomic64_inc(&qtu_events.iface_events); +		break; +	} +	return NOTIFY_DONE; +} + +static int iface_inetaddr_event_handler(struct notifier_block *nb, +					unsigned long event, void *ptr) +{ +	struct in_ifaddr *ifa = ptr; +	struct net_device *dev; + +	if (unlikely(module_passive)) +		return NOTIFY_DONE; + +	IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " +		 "ev=0x%lx/%s ifa=%p\n", +		 event, netdev_evt_str(event), ifa); + +	switch (event) { +	case NETDEV_UP: +		BUG_ON(!ifa || !ifa->ifa_dev); +		dev = ifa->ifa_dev->dev; +		iface_stat_create(dev, ifa); +		atomic64_inc(&qtu_events.iface_events); +		break; +	case NETDEV_DOWN: +	case NETDEV_UNREGISTER: +		BUG_ON(!ifa || !ifa->ifa_dev); +		dev = ifa->ifa_dev->dev; +		iface_stat_update(dev, event == NETDEV_DOWN); +		atomic64_inc(&qtu_events.iface_events); +		break; +	} +	return NOTIFY_DONE; +} + +static struct notifier_block iface_netdev_notifier_blk = { +	.notifier_call = iface_netdev_event_handler, +}; + +static struct notifier_block iface_inetaddr_notifier_blk = { +	.notifier_call = iface_inetaddr_event_handler, +}; + +static struct notifier_block iface_inet6addr_notifier_blk = { +	.notifier_call = iface_inet6addr_event_handler, +}; + +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) +{ +	int err; + +	iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); +	if (!iface_stat_procdir) { +		pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); +		err = -1; +		goto err; +	} + +	iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, +						    proc_iface_perms, +						    parent_procdir); +	if (!iface_stat_all_procfile) { +		pr_err("qtaguid: iface_stat: init " +		       " failed to create stat_old proc entry\n"); +		err = -1; +		goto err_zap_entry; +	} +	iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read; +	iface_stat_all_procfile->data = (void *)1; /* fmt1 */ + +	iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename, +						    proc_iface_perms, +						    parent_procdir); +	if (!iface_stat_fmt_procfile) { +		pr_err("qtaguid: iface_stat: init " +		       " failed to create stat_all proc entry\n"); +		err = -1; +		goto err_zap_all_stats_entry; +	} +	iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read; +	iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */ + + +	err = register_netdevice_notifier(&iface_netdev_notifier_blk); +	if (err) { +		pr_err("qtaguid: iface_stat: init " +		       "failed to register dev event handler\n"); +		goto err_zap_all_stats_entries; +	} +	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); +	if (err) { +		pr_err("qtaguid: iface_stat: init " +		       "failed to register ipv4 dev event handler\n"); +		goto err_unreg_nd; +	} + +	err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); +	if (err) { +		pr_err("qtaguid: iface_stat: init " +		       "failed to register ipv6 dev event handler\n"); +		goto err_unreg_ip4_addr; +	} +	return 0; + +err_unreg_ip4_addr: +	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); +err_unreg_nd: +	unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entries: +	remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir); +err_zap_all_stats_entry: +	remove_proc_entry(iface_stat_all_procfilename, parent_procdir); +err_zap_entry: +	remove_proc_entry(iface_stat_procdirname, parent_procdir); +err: +	return err; +} + +static struct sock *qtaguid_find_sk(const struct sk_buff *skb, +				    struct xt_action_param *par) +{ +	struct sock *sk; +	unsigned int hook_mask = (1 << par->hooknum); + +	MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, +		 par->hooknum, par->family); + +	/* +	 * Let's not abuse the the xt_socket_get*_sk(), or else it will +	 * return garbage SKs. +	 */ +	if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) +		return NULL; + +	switch (par->family) { +	case NFPROTO_IPV6: +		sk = xt_socket_get6_sk(skb, par); +		break; +	case NFPROTO_IPV4: +		sk = xt_socket_get4_sk(skb, par); +		break; +	default: +		return NULL; +	} + +	/* +	 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. +	 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 +	 * Not fixed in 3.0-r3 :( +	 */ +	if (sk) { +		MT_DEBUG("qtaguid: %p->sk_proto=%u " +			 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); +		if (sk->sk_state  == TCP_TIME_WAIT) { +			xt_socket_put_sk(sk); +			sk = NULL; +		} +	} +	return sk; +} + +static void account_for_uid(const struct sk_buff *skb, +			    const struct sock *alternate_sk, uid_t uid, +			    struct xt_action_param *par) +{ +	const struct net_device *el_dev; + +	if (!skb->dev) { +		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); +		el_dev = par->in ? : par->out; +	} else { +		const struct net_device *other_dev; +		el_dev = skb->dev; +		other_dev = par->in ? : par->out; +		if (el_dev != other_dev) { +			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " +				"par->(in/out)=%p %s\n", +				par->hooknum, el_dev, el_dev->name, other_dev, +				other_dev->name); +		} +	} + +	if (unlikely(!el_dev)) { +		pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); +	} else if (unlikely(!el_dev->name)) { +		pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); +	} else { +		int proto = ipx_proto(skb, par); +		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", +			 par->hooknum, el_dev->name, el_dev->type, +			 par->family, proto); + +		if_tag_stat_update(el_dev->name, uid, +				skb->sk ? skb->sk : alternate_sk, +				par->in ? IFS_RX : IFS_TX, +				proto, skb->len); +	} +} + +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_qtaguid_match_info *info = par->matchinfo; +	const struct file *filp; +	bool got_sock = false; +	struct sock *sk; +	uid_t sock_uid; +	bool res; + +	if (unlikely(module_passive)) +		return (info->match ^ info->invert) == 0; + +	MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", +		 par->hooknum, skb, par->in, par->out, par->family); + +	atomic64_inc(&qtu_events.match_calls); +	if (skb == NULL) { +		res = (info->match ^ info->invert) == 0; +		goto ret_res; +	} + +	switch (par->hooknum) { +	case NF_INET_PRE_ROUTING: +	case NF_INET_POST_ROUTING: +		atomic64_inc(&qtu_events.match_calls_prepost); +		iface_stat_update_from_skb(skb, par); +		/* +		 * We are done in pre/post. The skb will get processed +		 * further alter. +		 */ +		res = (info->match ^ info->invert); +		goto ret_res; +		break; +	/* default: Fall through and do UID releated work */ +	} + +	sk = skb->sk; +	if (sk == NULL) { +		/* +		 * A missing sk->sk_socket happens when packets are in-flight +		 * and the matching socket is already closed and gone. +		 */ +		sk = qtaguid_find_sk(skb, par); +		/* +		 * If we got the socket from the find_sk(), we will need to put +		 * it back, as nf_tproxy_get_sock_v4() got it. +		 */ +		got_sock = sk; +		if (sk) +			atomic64_inc(&qtu_events.match_found_sk_in_ct); +		else +			atomic64_inc(&qtu_events.match_found_no_sk_in_ct); +	} else { +		atomic64_inc(&qtu_events.match_found_sk); +	} +	MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n", +		 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par)); +	if (sk != NULL) { +		MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", +			par->hooknum, sk, sk->sk_socket, +			sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); +		filp = sk->sk_socket ? sk->sk_socket->file : NULL; +		MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", +			par->hooknum, filp ? filp->f_cred->fsuid : -1); +	} + +	if (sk == NULL || sk->sk_socket == NULL) { +		/* +		 * Here, the qtaguid_find_sk() using connection tracking +		 * couldn't find the owner, so for now we just count them +		 * against the system. +		 */ +		/* +		 * TODO: unhack how to force just accounting. +		 * For now we only do iface stats when the uid-owner is not +		 * requested. +		 */ +		if (!(info->match & XT_QTAGUID_UID)) +			account_for_uid(skb, sk, 0, par); +		MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", +			par->hooknum, +			sk ? sk->sk_socket : NULL); +		res = (info->match ^ info->invert) == 0; +		atomic64_inc(&qtu_events.match_no_sk); +		goto put_sock_ret_res; +	} else if (info->match & info->invert & XT_QTAGUID_SOCKET) { +		res = false; +		goto put_sock_ret_res; +	} +	filp = sk->sk_socket->file; +	if (filp == NULL) { +		MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); +		account_for_uid(skb, sk, 0, par); +		res = ((info->match ^ info->invert) & +			(XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; +		atomic64_inc(&qtu_events.match_no_sk_file); +		goto put_sock_ret_res; +	} +	sock_uid = filp->f_cred->fsuid; +	/* +	 * TODO: unhack how to force just accounting. +	 * For now we only do iface stats when the uid-owner is not requested +	 */ +	if (!(info->match & XT_QTAGUID_UID)) +		account_for_uid(skb, sk, sock_uid, par); + +	/* +	 * The following two tests fail the match when: +	 *    id not in range AND no inverted condition requested +	 * or id     in range AND    inverted condition requested +	 * Thus (!a && b) || (a && !b) == a ^ b +	 */ +	if (info->match & XT_QTAGUID_UID) +		if ((filp->f_cred->fsuid >= info->uid_min && +		     filp->f_cred->fsuid <= info->uid_max) ^ +		    !(info->invert & XT_QTAGUID_UID)) { +			MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", +				 par->hooknum); +			res = false; +			goto put_sock_ret_res; +		} +	if (info->match & XT_QTAGUID_GID) +		if ((filp->f_cred->fsgid >= info->gid_min && +				filp->f_cred->fsgid <= info->gid_max) ^ +			!(info->invert & XT_QTAGUID_GID)) { +			MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", +				par->hooknum); +			res = false; +			goto put_sock_ret_res; +		} + +	MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); +	res = true; + +put_sock_ret_res: +	if (got_sock) +		xt_socket_put_sk(sk); +ret_res: +	MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); +	return res; +} + +#ifdef DDEBUG +/* This function is not in xt_qtaguid_print.c because of locks visibility */ +static void prdebug_full_state(int indent_level, const char *fmt, ...) +{ +	va_list args; +	char *fmt_buff; +	char *buff; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	fmt_buff = kasprintf(GFP_ATOMIC, +			     "qtaguid: %s(): %s {\n", __func__, fmt); +	BUG_ON(!fmt_buff); +	va_start(args, fmt); +	buff = kvasprintf(GFP_ATOMIC, +			  fmt_buff, args); +	BUG_ON(!buff); +	pr_debug("%s", buff); +	kfree(fmt_buff); +	kfree(buff); +	va_end(args); + +	spin_lock_bh(&sock_tag_list_lock); +	prdebug_sock_tag_tree(indent_level, &sock_tag_tree); +	spin_unlock_bh(&sock_tag_list_lock); + +	spin_lock_bh(&sock_tag_list_lock); +	spin_lock_bh(&uid_tag_data_tree_lock); +	prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); +	prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); +	spin_unlock_bh(&uid_tag_data_tree_lock); +	spin_unlock_bh(&sock_tag_list_lock); + +	spin_lock_bh(&iface_stat_list_lock); +	prdebug_iface_stat_list(indent_level, &iface_stat_list); +	spin_unlock_bh(&iface_stat_list_lock); + +	pr_debug("qtaguid: %s(): }\n", __func__); +} +#else +static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +#endif + +/* + * Procfs reader to get all active socket tags using style "1)" as described in + * fs/proc/generic.c + */ +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, +				  off_t items_to_skip, int char_count, int *eof, +				  void *data) +{ +	char *outp = page; +	int len; +	uid_t uid; +	struct rb_node *node; +	struct sock_tag *sock_tag_entry; +	int item_index = 0; +	int indent_level = 0; +	long f_count; + +	if (unlikely(module_passive)) { +		*eof = 1; +		return 0; +	} + +	if (*eof) +		return 0; + +	CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u " +		 "page=%p off=%ld char_count=%d *eof=%d\n", +		 current->pid, current->tgid, current_fsuid(), +		 page, items_to_skip, char_count, *eof); + +	spin_lock_bh(&sock_tag_list_lock); +	for (node = rb_first(&sock_tag_tree); +	     node; +	     node = rb_next(node)) { +		if (item_index++ < items_to_skip) +			continue; +		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); +		uid = get_uid_from_tag(sock_tag_entry->tag); +		CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " +			 "pid=%u\n", +			 sock_tag_entry->sk, +			 sock_tag_entry->tag, +			 uid, +			 sock_tag_entry->pid +			); +		f_count = atomic_long_read( +			&sock_tag_entry->socket->file->f_count); +		len = snprintf(outp, char_count, +			       "sock=%p tag=0x%llx (uid=%u) pid=%u " +			       "f_count=%lu\n", +			       sock_tag_entry->sk, +			       sock_tag_entry->tag, uid, +			       sock_tag_entry->pid, f_count); +		if (len >= char_count) { +			spin_unlock_bh(&sock_tag_list_lock); +			*outp = '\0'; +			return outp - page; +		} +		outp += len; +		char_count -= len; +		(*num_items_returned)++; +	} +	spin_unlock_bh(&sock_tag_list_lock); + +	if (item_index++ >= items_to_skip) { +		len = snprintf(outp, char_count, +			       "events: sockets_tagged=%llu " +			       "sockets_untagged=%llu " +			       "counter_set_changes=%llu " +			       "delete_cmds=%llu " +			       "iface_events=%llu " +			       "match_calls=%llu " +			       "match_calls_prepost=%llu " +			       "match_found_sk=%llu " +			       "match_found_sk_in_ct=%llu " +			       "match_found_no_sk_in_ct=%llu " +			       "match_no_sk=%llu " +			       "match_no_sk_file=%llu\n", +			       atomic64_read(&qtu_events.sockets_tagged), +			       atomic64_read(&qtu_events.sockets_untagged), +			       atomic64_read(&qtu_events.counter_set_changes), +			       atomic64_read(&qtu_events.delete_cmds), +			       atomic64_read(&qtu_events.iface_events), +			       atomic64_read(&qtu_events.match_calls), +			       atomic64_read(&qtu_events.match_calls_prepost), +			       atomic64_read(&qtu_events.match_found_sk), +			       atomic64_read(&qtu_events.match_found_sk_in_ct), +			       atomic64_read( +				       &qtu_events.match_found_no_sk_in_ct), +			       atomic64_read(&qtu_events.match_no_sk), +			       atomic64_read(&qtu_events.match_no_sk_file)); +		if (len >= char_count) { +			*outp = '\0'; +			return outp - page; +		} +		outp += len; +		char_count -= len; +		(*num_items_returned)++; +	} + +	/* Count the following as part of the last item_index */ +	if (item_index > items_to_skip) { +		prdebug_full_state(indent_level, "proc ctrl"); +	} + +	*eof = 1; +	return outp - page; +} + +/* + * Delete socket tags, and stat tags associated with a given + * accouting tag and uid. + */ +static int ctrl_cmd_delete(const char *input) +{ +	char cmd; +	uid_t uid; +	uid_t entry_uid; +	tag_t acct_tag; +	tag_t tag; +	int res, argc; +	struct iface_stat *iface_entry; +	struct rb_node *node; +	struct sock_tag *st_entry; +	struct rb_root st_to_free_tree = RB_ROOT; +	struct tag_stat *ts_entry; +	struct tag_counter_set *tcs_entry; +	struct tag_ref *tr_entry; +	struct uid_tag_data *utd_entry; + +	argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); +	CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " +		 "user_tag=0x%llx uid=%u\n", input, argc, cmd, +		 acct_tag, uid); +	if (argc < 2) { +		res = -EINVAL; +		goto err; +	} +	if (!valid_atag(acct_tag)) { +		pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); +		res = -EINVAL; +		goto err; +	} +	if (argc < 3) { +		uid = current_fsuid(); +	} else if (!can_impersonate_uid(uid)) { +		pr_info("qtaguid: ctrl_delete(%s): " +			"insufficient priv from pid=%u tgid=%u uid=%u\n", +			input, current->pid, current->tgid, current_fsuid()); +		res = -EPERM; +		goto err; +	} + +	tag = combine_atag_with_uid(acct_tag, uid); +	CT_DEBUG("qtaguid: ctrl_delete(%s): " +		 "looking for tag=0x%llx (uid=%u)\n", +		 input, tag, uid); + +	/* Delete socket tags */ +	spin_lock_bh(&sock_tag_list_lock); +	node = rb_first(&sock_tag_tree); +	while (node) { +		st_entry = rb_entry(node, struct sock_tag, sock_node); +		entry_uid = get_uid_from_tag(st_entry->tag); +		node = rb_next(node); +		if (entry_uid != uid) +			continue; + +		CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", +			 input, st_entry->tag, entry_uid); + +		if (!acct_tag || st_entry->tag == tag) { +			rb_erase(&st_entry->sock_node, &sock_tag_tree); +			/* Can't sockfd_put() within spinlock, do it later. */ +			sock_tag_tree_insert(st_entry, &st_to_free_tree); +			tr_entry = lookup_tag_ref(st_entry->tag, NULL); +			BUG_ON(tr_entry->num_sock_tags <= 0); +			tr_entry->num_sock_tags--; +			/* +			 * TODO: remove if, and start failing. +			 * This is a hack to work around the fact that in some +			 * places we have "if (IS_ERR_OR_NULL(pqd_entry))" +			 * and are trying to work around apps +			 * that didn't open the /dev/xt_qtaguid. +			 */ +			if (st_entry->list.next && st_entry->list.prev) +				list_del(&st_entry->list); +		} +	} +	spin_unlock_bh(&sock_tag_list_lock); + +	sock_tag_tree_erase(&st_to_free_tree); + +	/* Delete tag counter-sets */ +	spin_lock_bh(&tag_counter_set_list_lock); +	/* Counter sets are only on the uid tag, not full tag */ +	tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (tcs_entry) { +		CT_DEBUG("qtaguid: ctrl_delete(%s): " +			 "erase tcs: tag=0x%llx (uid=%u) set=%d\n", +			 input, +			 tcs_entry->tn.tag, +			 get_uid_from_tag(tcs_entry->tn.tag), +			 tcs_entry->active_set); +		rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); +		kfree(tcs_entry); +	} +	spin_unlock_bh(&tag_counter_set_list_lock); + +	/* +	 * If acct_tag is 0, then all entries belonging to uid are +	 * erased. +	 */ +	spin_lock_bh(&iface_stat_list_lock); +	list_for_each_entry(iface_entry, &iface_stat_list, list) { +		spin_lock_bh(&iface_entry->tag_stat_list_lock); +		node = rb_first(&iface_entry->tag_stat_tree); +		while (node) { +			ts_entry = rb_entry(node, struct tag_stat, tn.node); +			entry_uid = get_uid_from_tag(ts_entry->tn.tag); +			node = rb_next(node); + +			CT_DEBUG("qtaguid: ctrl_delete(%s): " +				 "ts tag=0x%llx (uid=%u)\n", +				 input, ts_entry->tn.tag, entry_uid); + +			if (entry_uid != uid) +				continue; +			if (!acct_tag || ts_entry->tn.tag == tag) { +				CT_DEBUG("qtaguid: ctrl_delete(%s): " +					 "erase ts: %s 0x%llx %u\n", +					 input, iface_entry->ifname, +					 get_atag_from_tag(ts_entry->tn.tag), +					 entry_uid); +				rb_erase(&ts_entry->tn.node, +					 &iface_entry->tag_stat_tree); +				kfree(ts_entry); +			} +		} +		spin_unlock_bh(&iface_entry->tag_stat_list_lock); +	} +	spin_unlock_bh(&iface_stat_list_lock); + +	/* Cleanup the uid_tag_data */ +	spin_lock_bh(&uid_tag_data_tree_lock); +	node = rb_first(&uid_tag_data_tree); +	while (node) { +		utd_entry = rb_entry(node, struct uid_tag_data, node); +		entry_uid = utd_entry->uid; +		node = rb_next(node); + +		CT_DEBUG("qtaguid: ctrl_delete(%s): " +			 "utd uid=%u\n", +			 input, entry_uid); + +		if (entry_uid != uid) +			continue; +		/* +		 * Go over the tag_refs, and those that don't have +		 * sock_tags using them are freed. +		 */ +		put_tag_ref_tree(tag, utd_entry); +		put_utd_entry(utd_entry); +	} +	spin_unlock_bh(&uid_tag_data_tree_lock); + +	atomic64_inc(&qtu_events.delete_cmds); +	res = 0; + +err: +	return res; +} + +static int ctrl_cmd_counter_set(const char *input) +{ +	char cmd; +	uid_t uid = 0; +	tag_t tag; +	int res, argc; +	struct tag_counter_set *tcs; +	int counter_set; + +	argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); +	CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " +		 "set=%d uid=%u\n", input, argc, cmd, +		 counter_set, uid); +	if (argc != 3) { +		res = -EINVAL; +		goto err; +	} +	if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { +		pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", +			input); +		res = -EINVAL; +		goto err; +	} +	if (!can_manipulate_uids()) { +		pr_info("qtaguid: ctrl_counterset(%s): " +			"insufficient priv from pid=%u tgid=%u uid=%u\n", +			input, current->pid, current->tgid, current_fsuid()); +		res = -EPERM; +		goto err; +	} + +	tag = make_tag_from_uid(uid); +	spin_lock_bh(&tag_counter_set_list_lock); +	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (!tcs) { +		tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); +		if (!tcs) { +			spin_unlock_bh(&tag_counter_set_list_lock); +			pr_err("qtaguid: ctrl_counterset(%s): " +			       "failed to alloc counter set\n", +			       input); +			res = -ENOMEM; +			goto err; +		} +		tcs->tn.tag = tag; +		tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); +		CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " +			 "(uid=%u) set=%d\n", +			 input, tag, get_uid_from_tag(tag), counter_set); +	} +	tcs->active_set = counter_set; +	spin_unlock_bh(&tag_counter_set_list_lock); +	atomic64_inc(&qtu_events.counter_set_changes); +	res = 0; + +err: +	return res; +} + +static int ctrl_cmd_tag(const char *input) +{ +	char cmd; +	int sock_fd = 0; +	uid_t uid = 0; +	tag_t acct_tag = make_atag_from_value(0); +	tag_t full_tag; +	struct socket *el_socket; +	int res, argc; +	struct sock_tag *sock_tag_entry; +	struct tag_ref *tag_ref_entry; +	struct uid_tag_data *uid_tag_data_entry; +	struct proc_qtu_data *pqd_entry; + +	/* Unassigned args will get defaulted later. */ +	argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); +	CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " +		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, +		 acct_tag, uid); +	if (argc < 2) { +		res = -EINVAL; +		goto err; +	} +	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */ +	if (!el_socket) { +		pr_info("qtaguid: ctrl_tag(%s): failed to lookup" +			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", +			input, sock_fd, res, current->pid, current->tgid, +			current_fsuid()); +		goto err; +	} +	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", +		 input, atomic_long_read(&el_socket->file->f_count), +		 el_socket->sk); +	if (argc < 3) { +		acct_tag = make_atag_from_value(0); +	} else if (!valid_atag(acct_tag)) { +		pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); +		res = -EINVAL; +		goto err_put; +	} +	CT_DEBUG("qtaguid: ctrl_tag(%s): " +		 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " +		 "in_group=%d in_egroup=%d\n", +		 input, current->pid, current->tgid, current_uid(), +		 current_euid(), current_fsuid(), +		 in_group_p(proc_ctrl_write_gid), +		 in_egroup_p(proc_ctrl_write_gid)); +	if (argc < 4) { +		uid = current_fsuid(); +	} else if (!can_impersonate_uid(uid)) { +		pr_info("qtaguid: ctrl_tag(%s): " +			"insufficient priv from pid=%u tgid=%u uid=%u\n", +			input, current->pid, current->tgid, current_fsuid()); +		res = -EPERM; +		goto err_put; +	} +	full_tag = combine_atag_with_uid(acct_tag, uid); + +	spin_lock_bh(&sock_tag_list_lock); +	sock_tag_entry = get_sock_stat_nl(el_socket->sk); +	tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); +	if (IS_ERR(tag_ref_entry)) { +		res = PTR_ERR(tag_ref_entry); +		spin_unlock_bh(&sock_tag_list_lock); +		goto err_put; +	} +	tag_ref_entry->num_sock_tags++; +	if (sock_tag_entry) { +		struct tag_ref *prev_tag_ref_entry; + +		CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " +			 "st@%p ...->f_count=%ld\n", +			 input, el_socket->sk, sock_tag_entry, +			 atomic_long_read(&el_socket->file->f_count)); +		/* +		 * This is a re-tagging, so release the sock_fd that was +		 * locked at the time of the 1st tagging. +		 * There is still the ref from this call's sockfd_lookup() so +		 * it can be done within the spinlock. +		 */ +		sockfd_put(sock_tag_entry->socket); +		prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, +						    &uid_tag_data_entry); +		BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); +		BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); +		prev_tag_ref_entry->num_sock_tags--; +		sock_tag_entry->tag = full_tag; +	} else { +		CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", +			 input, el_socket->sk); +		sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), +					 GFP_ATOMIC); +		if (!sock_tag_entry) { +			pr_err("qtaguid: ctrl_tag(%s): " +			       "socket tag alloc failed\n", +			       input); +			spin_unlock_bh(&sock_tag_list_lock); +			res = -ENOMEM; +			goto err_tag_unref_put; +		} +		sock_tag_entry->sk = el_socket->sk; +		sock_tag_entry->socket = el_socket; +		sock_tag_entry->pid = current->tgid; +		sock_tag_entry->tag = combine_atag_with_uid(acct_tag, +							    uid); +		spin_lock_bh(&uid_tag_data_tree_lock); +		pqd_entry = proc_qtu_data_tree_search( +			&proc_qtu_data_tree, current->tgid); +		/* +		 * TODO: remove if, and start failing. +		 * At first, we want to catch user-space code that is not +		 * opening the /dev/xt_qtaguid. +		 */ +		if (IS_ERR_OR_NULL(pqd_entry)) +			pr_warn_once( +				"qtaguid: %s(): " +				"User space forgot to open /dev/xt_qtaguid? " +				"pid=%u tgid=%u uid=%u\n", __func__, +				current->pid, current->tgid, +				current_fsuid()); +		else +			list_add(&sock_tag_entry->list, +				 &pqd_entry->sock_tag_list); +		spin_unlock_bh(&uid_tag_data_tree_lock); + +		sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); +		atomic64_inc(&qtu_events.sockets_tagged); +	} +	spin_unlock_bh(&sock_tag_list_lock); +	/* We keep the ref to the socket (file) until it is untagged */ +	CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", +		 input, sock_tag_entry, +		 atomic_long_read(&el_socket->file->f_count)); +	return 0; + +err_tag_unref_put: +	BUG_ON(tag_ref_entry->num_sock_tags <= 0); +	tag_ref_entry->num_sock_tags--; +	free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); +err_put: +	CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", +		 input, atomic_long_read(&el_socket->file->f_count) - 1); +	/* Release the sock_fd that was grabbed by sockfd_lookup(). */ +	sockfd_put(el_socket); +	return res; + +err: +	CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); +	return res; +} + +static int ctrl_cmd_untag(const char *input) +{ +	char cmd; +	int sock_fd = 0; +	struct socket *el_socket; +	int res, argc; +	struct sock_tag *sock_tag_entry; +	struct tag_ref *tag_ref_entry; +	struct uid_tag_data *utd_entry; +	struct proc_qtu_data *pqd_entry; + +	argc = sscanf(input, "%c %d", &cmd, &sock_fd); +	CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", +		 input, argc, cmd, sock_fd); +	if (argc < 2) { +		res = -EINVAL; +		goto err; +	} +	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */ +	if (!el_socket) { +		pr_info("qtaguid: ctrl_untag(%s): failed to lookup" +			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", +			input, sock_fd, res, current->pid, current->tgid, +			current_fsuid()); +		goto err; +	} +	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", +		 input, atomic_long_read(&el_socket->file->f_count), +		 el_socket->sk); +	spin_lock_bh(&sock_tag_list_lock); +	sock_tag_entry = get_sock_stat_nl(el_socket->sk); +	if (!sock_tag_entry) { +		spin_unlock_bh(&sock_tag_list_lock); +		res = -EINVAL; +		goto err_put; +	} +	/* +	 * The socket already belongs to the current process +	 * so it can do whatever it wants to it. +	 */ +	rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); + +	tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); +	BUG_ON(!tag_ref_entry); +	BUG_ON(tag_ref_entry->num_sock_tags <= 0); +	spin_lock_bh(&uid_tag_data_tree_lock); +	pqd_entry = proc_qtu_data_tree_search( +		&proc_qtu_data_tree, current->tgid); +	/* +	 * TODO: remove if, and start failing. +	 * At first, we want to catch user-space code that is not +	 * opening the /dev/xt_qtaguid. +	 */ +	if (IS_ERR_OR_NULL(pqd_entry)) +		pr_warn_once("qtaguid: %s(): " +			     "User space forgot to open /dev/xt_qtaguid? " +			     "pid=%u tgid=%u uid=%u\n", __func__, +			     current->pid, current->tgid, current_fsuid()); +	else +		list_del(&sock_tag_entry->list); +	spin_unlock_bh(&uid_tag_data_tree_lock); +	/* +	 * We don't free tag_ref from the utd_entry here, +	 * only during a cmd_delete(). +	 */ +	tag_ref_entry->num_sock_tags--; +	spin_unlock_bh(&sock_tag_list_lock); +	/* +	 * Release the sock_fd that was grabbed at tag time, +	 * and once more for the sockfd_lookup() here. +	 */ +	sockfd_put(sock_tag_entry->socket); +	CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", +		 input, sock_tag_entry, +		 atomic_long_read(&el_socket->file->f_count) - 1); +	sockfd_put(el_socket); + +	kfree(sock_tag_entry); +	atomic64_inc(&qtu_events.sockets_untagged); + +	return 0; + +err_put: +	CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", +		 input, atomic_long_read(&el_socket->file->f_count) - 1); +	/* Release the sock_fd that was grabbed by sockfd_lookup(). */ +	sockfd_put(el_socket); +	return res; + +err: +	CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); +	return res; +} + +static int qtaguid_ctrl_parse(const char *input, int count) +{ +	char cmd; +	int res; + +	CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", +		 input, current->pid, current->tgid, current_fsuid()); + +	cmd = input[0]; +	/* Collect params for commands */ +	switch (cmd) { +	case 'd': +		res = ctrl_cmd_delete(input); +		break; + +	case 's': +		res = ctrl_cmd_counter_set(input); +		break; + +	case 't': +		res = ctrl_cmd_tag(input); +		break; + +	case 'u': +		res = ctrl_cmd_untag(input); +		break; + +	default: +		res = -EINVAL; +		goto err; +	} +	if (!res) +		res = count; +err: +	CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); +	return res; +} + +#define MAX_QTAGUID_CTRL_INPUT_LEN 255 +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, +			unsigned long count, void *data) +{ +	char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; + +	if (unlikely(module_passive)) +		return count; + +	if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) +		return -EINVAL; + +	if (copy_from_user(input_buf, buffer, count)) +		return -EFAULT; + +	input_buf[count] = '\0'; +	return qtaguid_ctrl_parse(input_buf, count); +} + +struct proc_print_info { +	char *outp; +	char **num_items_returned; +	struct iface_stat *iface_entry; +	struct tag_stat *ts_entry; +	int item_index; +	int items_to_skip; +	int char_count; +}; + +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) +{ +	int len; +	struct data_counters *cnts; + +	if (!ppi->item_index) { +		if (ppi->item_index++ < ppi->items_to_skip) +			return 0; +		len = snprintf(ppi->outp, ppi->char_count, +			       "idx iface acct_tag_hex uid_tag_int cnt_set " +			       "rx_bytes rx_packets " +			       "tx_bytes tx_packets " +			       "rx_tcp_bytes rx_tcp_packets " +			       "rx_udp_bytes rx_udp_packets " +			       "rx_other_bytes rx_other_packets " +			       "tx_tcp_bytes tx_tcp_packets " +			       "tx_udp_bytes tx_udp_packets " +			       "tx_other_bytes tx_other_packets\n"); +	} else { +		tag_t tag = ppi->ts_entry->tn.tag; +		uid_t stat_uid = get_uid_from_tag(tag); + +		if (!can_read_other_uid_stats(stat_uid)) { +			CT_DEBUG("qtaguid: stats line: " +				 "%s 0x%llx %u: insufficient priv " +				 "from pid=%u tgid=%u uid=%u\n", +				 ppi->iface_entry->ifname, +				 get_atag_from_tag(tag), stat_uid, +				 current->pid, current->tgid, current_fsuid()); +			return 0; +		} +		if (ppi->item_index++ < ppi->items_to_skip) +			return 0; +		cnts = &ppi->ts_entry->counters; +		len = snprintf( +			ppi->outp, ppi->char_count, +			"%d %s 0x%llx %u %u " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu\n", +			ppi->item_index, +			ppi->iface_entry->ifname, +			get_atag_from_tag(tag), +			stat_uid, +			cnt_set, +			dc_sum_bytes(cnts, cnt_set, IFS_RX), +			dc_sum_packets(cnts, cnt_set, IFS_RX), +			dc_sum_bytes(cnts, cnt_set, IFS_TX), +			dc_sum_packets(cnts, cnt_set, IFS_TX), +			cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, +			cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, +			cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); +	} +	return len; +} + +static bool pp_sets(struct proc_print_info *ppi) +{ +	int len; +	int counter_set; +	for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; +	     counter_set++) { +		len = pp_stats_line(ppi, counter_set); +		if (len >= ppi->char_count) { +			*ppi->outp = '\0'; +			return false; +		} +		if (len) { +			ppi->outp += len; +			ppi->char_count -= len; +			(*ppi->num_items_returned)++; +		} +	} +	return true; +} + +/* + * Procfs reader to get all tag stats using style "1)" as described in + * fs/proc/generic.c + * Groups all protocols tx/rx bytes. + */ +static int qtaguid_stats_proc_read(char *page, char **num_items_returned, +				off_t items_to_skip, int char_count, int *eof, +				void *data) +{ +	struct proc_print_info ppi; +	int len; + +	ppi.outp = page; +	ppi.item_index = 0; +	ppi.char_count = char_count; +	ppi.num_items_returned = num_items_returned; +	ppi.items_to_skip = items_to_skip; + +	if (unlikely(module_passive)) { +		len = pp_stats_line(&ppi, 0); +		/* The header should always be shorter than the buffer. */ +		BUG_ON(len >= ppi.char_count); +		(*num_items_returned)++; +		*eof = 1; +		return len; +	} + +	CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u " +		 "page=%p *num_items_returned=%p off=%ld " +		 "char_count=%d *eof=%d\n", +		 current->pid, current->tgid, current_fsuid(), +		 page, *num_items_returned, +		 items_to_skip, char_count, *eof); + +	if (*eof) +		return 0; + +	/* The idx is there to help debug when things go belly up. */ +	len = pp_stats_line(&ppi, 0); +	/* Don't advance the outp unless the whole line was printed */ +	if (len >= ppi.char_count) { +		*ppi.outp = '\0'; +		return ppi.outp - page; +	} +	if (len) { +		ppi.outp += len; +		ppi.char_count -= len; +		(*num_items_returned)++; +	} + +	spin_lock_bh(&iface_stat_list_lock); +	list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { +		struct rb_node *node; +		spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); +		for (node = rb_first(&ppi.iface_entry->tag_stat_tree); +		     node; +		     node = rb_next(node)) { +			ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); +			if (!pp_sets(&ppi)) { +				spin_unlock_bh( +					&ppi.iface_entry->tag_stat_list_lock); +				spin_unlock_bh(&iface_stat_list_lock); +				return ppi.outp - page; +			} +		} +		spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); +	} +	spin_unlock_bh(&iface_stat_list_lock); + +	*eof = 1; +	return ppi.outp - page; +} + +/*------------------------------------------*/ +static int qtudev_open(struct inode *inode, struct file *file) +{ +	struct uid_tag_data *utd_entry; +	struct proc_qtu_data  *pqd_entry; +	struct proc_qtu_data  *new_pqd_entry; +	int res; +	bool utd_entry_found; + +	if (unlikely(qtu_proc_handling_passive)) +		return 0; + +	DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", +		 current->pid, current->tgid, current_fsuid()); + +	spin_lock_bh(&uid_tag_data_tree_lock); + +	/* Look for existing uid data, or alloc one. */ +	utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); +	if (IS_ERR_OR_NULL(utd_entry)) { +		res = PTR_ERR(utd_entry); +		goto err; +	} + +	/* Look for existing PID based proc_data */ +	pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, +					      current->tgid); +	if (pqd_entry) { +		pr_err("qtaguid: qtudev_open(): %u/%u %u " +		       "%s already opened\n", +		       current->pid, current->tgid, current_fsuid(), +		       QTU_DEV_NAME); +		res = -EBUSY; +		goto err_unlock_free_utd; +	} + +	new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); +	if (!new_pqd_entry) { +		pr_err("qtaguid: qtudev_open(): %u/%u %u: " +		       "proc data alloc failed\n", +		       current->pid, current->tgid, current_fsuid()); +		res = -ENOMEM; +		goto err_unlock_free_utd; +	} +	new_pqd_entry->pid = current->tgid; +	INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); +	new_pqd_entry->parent_tag_data = utd_entry; +	utd_entry->num_pqd++; + +	proc_qtu_data_tree_insert(new_pqd_entry, +				  &proc_qtu_data_tree); + +	spin_unlock_bh(&uid_tag_data_tree_lock); +	DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", +		 current_fsuid(), new_pqd_entry); +	file->private_data = new_pqd_entry; +	return 0; + +err_unlock_free_utd: +	if (!utd_entry_found) { +		rb_erase(&utd_entry->node, &uid_tag_data_tree); +		kfree(utd_entry); +	} +	spin_unlock_bh(&uid_tag_data_tree_lock); +err: +	return res; +} + +static int qtudev_release(struct inode *inode, struct file *file) +{ +	struct proc_qtu_data  *pqd_entry = file->private_data; +	struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data; +	struct sock_tag *st_entry; +	struct rb_root st_to_free_tree = RB_ROOT; +	struct list_head *entry, *next; +	struct tag_ref *tr; + +	if (unlikely(qtu_proc_handling_passive)) +		return 0; + +	/* +	 * Do not trust the current->pid, it might just be a kworker cleaning +	 * up after a dead proc. +	 */ +	DR_DEBUG("qtaguid: qtudev_release(): " +		 "pid=%u tgid=%u uid=%u " +		 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", +		 current->pid, current->tgid, pqd_entry->parent_tag_data->uid, +		 pqd_entry, pqd_entry->pid, utd_entry, +		 utd_entry->num_active_tags); + +	spin_lock_bh(&sock_tag_list_lock); +	spin_lock_bh(&uid_tag_data_tree_lock); + +	list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { +		st_entry = list_entry(entry, struct sock_tag, list); +		DR_DEBUG("qtaguid: %s(): " +			 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", +			 __func__, +			 st_entry, st_entry->sk, +			 current->pid, current->tgid, +			 pqd_entry->parent_tag_data->uid); + +		utd_entry = uid_tag_data_tree_search( +			&uid_tag_data_tree, +			get_uid_from_tag(st_entry->tag)); +		BUG_ON(IS_ERR_OR_NULL(utd_entry)); +		DR_DEBUG("qtaguid: %s(): " +			 "looking for tag=0x%llx in utd_entry=%p\n", __func__, +			 st_entry->tag, utd_entry); +		tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, +					 st_entry->tag); +		BUG_ON(!tr); +		BUG_ON(tr->num_sock_tags <= 0); +		tr->num_sock_tags--; +		free_tag_ref_from_utd_entry(tr, utd_entry); + +		rb_erase(&st_entry->sock_node, &sock_tag_tree); +		list_del(&st_entry->list); +		/* Can't sockfd_put() within spinlock, do it later. */ +		sock_tag_tree_insert(st_entry, &st_to_free_tree); + +		/* +		 * Try to free the utd_entry if no other proc_qtu_data is +		 * using it (num_pqd is 0) and it doesn't have active tags +		 * (num_active_tags is 0). +		 */ +		put_utd_entry(utd_entry); +	} + +	rb_erase(&pqd_entry->node, &proc_qtu_data_tree); +	BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); +	pqd_entry->parent_tag_data->num_pqd--; +	put_utd_entry(pqd_entry->parent_tag_data); +	kfree(pqd_entry); +	file->private_data = NULL; + +	spin_unlock_bh(&uid_tag_data_tree_lock); +	spin_unlock_bh(&sock_tag_list_lock); + + +	sock_tag_tree_erase(&st_to_free_tree); + +	prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, +			   current->pid, current->tgid); +	return 0; +} + +/*------------------------------------------*/ +static const struct file_operations qtudev_fops = { +	.owner = THIS_MODULE, +	.open = qtudev_open, +	.release = qtudev_release, +}; + +static struct miscdevice qtu_device = { +	.minor = MISC_DYNAMIC_MINOR, +	.name = QTU_DEV_NAME, +	.fops = &qtudev_fops, +	/* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ +}; + +/*------------------------------------------*/ +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) +{ +	int ret; +	*res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); +	if (!*res_procdir) { +		pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); +		ret = -ENOMEM; +		goto no_dir; +	} + +	xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, +						*res_procdir); +	if (!xt_qtaguid_ctrl_file) { +		pr_err("qtaguid: failed to create xt_qtaguid/ctrl " +			" file\n"); +		ret = -ENOMEM; +		goto no_ctrl_entry; +	} +	xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; +	xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; + +	xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, +						*res_procdir); +	if (!xt_qtaguid_stats_file) { +		pr_err("qtaguid: failed to create xt_qtaguid/stats " +			"file\n"); +		ret = -ENOMEM; +		goto no_stats_entry; +	} +	xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; +	/* +	 * TODO: add support counter hacking +	 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; +	 */ +	return 0; + +no_stats_entry: +	remove_proc_entry("ctrl", *res_procdir); +no_ctrl_entry: +	remove_proc_entry("xt_qtaguid", NULL); +no_dir: +	return ret; +} + +static struct xt_match qtaguid_mt_reg __read_mostly = { +	/* +	 * This module masquerades as the "owner" module so that iptables +	 * tools can deal with it. +	 */ +	.name       = "owner", +	.revision   = 1, +	.family     = NFPROTO_UNSPEC, +	.match      = qtaguid_mt, +	.matchsize  = sizeof(struct xt_qtaguid_match_info), +	.me         = THIS_MODULE, +}; + +static int __init qtaguid_mt_init(void) +{ +	if (qtaguid_proc_register(&xt_qtaguid_procdir) +	    || iface_stat_init(xt_qtaguid_procdir) +	    || xt_register_match(&qtaguid_mt_reg) +	    || misc_register(&qtu_device)) +		return -1; +	return 0; +} + +/* + * TODO: allow unloading of the module. + * For now stats are permanent. + * Kconfig forces'y/n' and never an 'm'. + */ + +module_init(qtaguid_mt_init); +MODULE_AUTHOR("jpa <jpa@google.com>"); +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_owner"); +MODULE_ALIAS("ip6t_owner"); +MODULE_ALIAS("ipt_qtaguid"); +MODULE_ALIAS("ip6t_qtaguid"); diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 00000000000..d79f8383abf --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h @@ -0,0 +1,333 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_INTERNAL_H__ +#define __XT_QTAGUID_INTERNAL_H__ + +#include <linux/types.h> +#include <linux/rbtree.h> +#include <linux/spinlock_types.h> +#include <linux/workqueue.h> + +/* Iface handling */ +#define IDEBUG_MASK (1<<0) +/* Iptable Matching. Per packet. */ +#define MDEBUG_MASK (1<<1) +/* Red-black tree handling. Per packet. */ +#define RDEBUG_MASK (1<<2) +/* procfs ctrl/stats handling */ +#define CDEBUG_MASK (1<<3) +/* dev and resource tracking */ +#define DDEBUG_MASK (1<<4) + +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ +#define DEFAULT_DEBUG_MASK 0 + +/* + * (Un)Define these *DEBUG to compile out/in the pr_debug calls. + * All undef: text size ~ 0x3030; all def: ~ 0x4404. + */ +#define IDEBUG +#define MDEBUG +#define RDEBUG +#define CDEBUG +#define DDEBUG + +#define MSK_DEBUG(mask, ...) do {                           \ +		if (unlikely(qtaguid_debug_mask & (mask)))  \ +			pr_debug(__VA_ARGS__);              \ +	} while (0) +#ifdef IDEBUG +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) +#else +#define IF_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef MDEBUG +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) +#else +#define MT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef RDEBUG +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) +#else +#define RB_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef CDEBUG +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) +#else +#define CT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef DDEBUG +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) +#else +#define DR_DEBUG(...) no_printk(__VA_ARGS__) +#endif + +extern uint qtaguid_debug_mask; + +/*---------------------------------------------------------------------------*/ +/* + * Tags: + * + * They represent what the data usage counters will be tracked against. + * By default a tag is just based on the UID. + * The UID is used as the base for policing, and can not be ignored. + * So a tag will always at least represent a UID (uid_tag). + * + * A tag can be augmented with an "accounting tag" which is associated + * with a UID. + * User space can set the acct_tag portion of the tag which is then used + * with sockets: all data belonging to that socket will be counted against the + * tag. The policing is then based on the tag's uid_tag portion, + * and stats are collected for the acct_tag portion separately. + * + * There could be + * a:  {acct_tag=1, uid_tag=10003} + * b:  {acct_tag=2, uid_tag=10003} + * c:  {acct_tag=3, uid_tag=10003} + * d:  {acct_tag=0, uid_tag=10003} + * a, b, and c represent tags associated with specific sockets. + * d is for the totals for that uid, including all untagged traffic. + * Typically d is used with policing/quota rules. + * + * We want tag_t big enough to distinguish uid_t and acct_tag. + * It might become a struct if needed. + * Nothing should be using it as an int. + */ +typedef uint64_t tag_t;  /* Only used via accessors */ + +#define TAG_UID_MASK 0xFFFFFFFFULL +#define TAG_ACCT_MASK (~0xFFFFFFFFULL) + +static inline int tag_compare(tag_t t1, tag_t t2) +{ +	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; +} + +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) +{ +	return acct_tag | uid; +} +static inline tag_t make_tag_from_uid(uid_t uid) +{ +	return uid; +} +static inline uid_t get_uid_from_tag(tag_t tag) +{ +	return tag & TAG_UID_MASK; +} +static inline tag_t get_utag_from_tag(tag_t tag) +{ +	return tag & TAG_UID_MASK; +} +static inline tag_t get_atag_from_tag(tag_t tag) +{ +	return tag & TAG_ACCT_MASK; +} + +static inline bool valid_atag(tag_t tag) +{ +	return !(tag & TAG_UID_MASK); +} +static inline tag_t make_atag_from_value(uint32_t value) +{ +	return (uint64_t)value << 32; +} +/*---------------------------------------------------------------------------*/ + +/* + * Maximum number of socket tags that a UID is allowed to have active. + * Multiple processes belonging to the same UID contribute towards this limit. + * Special UIDs that can impersonate a UID also contribute (e.g. download + * manager, ...) + */ +#define DEFAULT_MAX_SOCK_TAGS 1024 + +/* + * For now we only track 2 sets of counters. + * The default set is 0. + * Userspace can activate another set for a given uid being tracked. + */ +#define IFS_MAX_COUNTER_SETS 2 + +enum ifs_tx_rx { +	IFS_TX, +	IFS_RX, +	IFS_MAX_DIRECTIONS +}; + +/* For now, TCP, UDP, the rest */ +enum ifs_proto { +	IFS_TCP, +	IFS_UDP, +	IFS_PROTO_OTHER, +	IFS_MAX_PROTOS +}; + +struct byte_packet_counters { +	uint64_t bytes; +	uint64_t packets; +}; + +struct data_counters { +	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; +}; + +/* Generic X based nodes used as a base for rb_tree ops */ +struct tag_node { +	struct rb_node node; +	tag_t tag; +}; + +struct tag_stat { +	struct tag_node tn; +	struct data_counters counters; +	/* +	 * If this tag is acct_tag based, we need to count against the +	 * matching parent uid_tag. +	 */ +	struct data_counters *parent_counters; +}; + +struct iface_stat { +	struct list_head list;  /* in iface_stat_list */ +	char *ifname; +	bool active; +	/* net_dev is only valid for active iface_stat */ +	struct net_device *net_dev; + +	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; +	struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; +	/* +	 * We keep the last_known, because some devices reset their counters +	 * just before NETDEV_UP, while some will reset just before +	 * NETDEV_REGISTER (which is more normal). +	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see +	 * its current dev stats smaller that what was previously known, we +	 * assume an UNREGISTER and just use the last_known. +	 */ +	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; +	/* last_known is usable when last_known_valid is true */ +	bool last_known_valid; + +	struct proc_dir_entry *proc_ptr; + +	struct rb_root tag_stat_tree; +	spinlock_t tag_stat_list_lock; +}; + +/* This is needed to create proc_dir_entries from atomic context. */ +struct iface_stat_work { +	struct work_struct iface_work; +	struct iface_stat *iface_entry; +}; + +/* + * Track tag that this socket is transferring data for, and not necessarily + * the uid that owns the socket. + * This is the tag against which tag_stat.counters will be billed. + * These structs need to be looked up by sock and pid. + */ +struct sock_tag { +	struct rb_node sock_node; +	struct sock *sk;  /* Only used as a number, never dereferenced */ +	/* The socket is needed for sockfd_put() */ +	struct socket *socket; +	/* Used to associate with a given pid */ +	struct list_head list;   /* in proc_qtu_data.sock_tag_list */ +	pid_t pid; + +	tag_t tag; +}; + +struct qtaguid_event_counts { +	/* Various successful events */ +	atomic64_t sockets_tagged; +	atomic64_t sockets_untagged; +	atomic64_t counter_set_changes; +	atomic64_t delete_cmds; +	atomic64_t iface_events;  /* Number of NETDEV_* events handled */ + +	atomic64_t match_calls;   /* Number of times iptables called mt */ +	/* Number of times iptables called mt from pre or post routing hooks */ +	atomic64_t match_calls_prepost; +	/* +	 * match_found_sk_*: numbers related to the netfilter matching +	 * function finding a sock for the sk_buff. +	 * Total skbs processed is sum(match_found*). +	 */ +	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */ +	/* The connection tracker had or didn't have the sk. */ +	atomic64_t match_found_sk_in_ct; +	atomic64_t match_found_no_sk_in_ct; +	/* +	 * No sk could be found. No apparent owner. Could happen with +	 * unsolicited traffic. +	 */ +	atomic64_t match_no_sk; +	/* +	 * The file ptr in the sk_socket wasn't there. +	 * This might happen for traffic while the socket is being closed. +	 */ +	atomic64_t match_no_sk_file; +}; + +/* Track the set active_set for the given tag. */ +struct tag_counter_set { +	struct tag_node tn; +	int active_set; +}; + +/*----------------------------------------------*/ +/* + * The qtu uid data is used to track resources that are created directly or + * indirectly by processes (uid tracked). + * It is shared by the processes with the same uid. + * Some of the resource will be counted to prevent further rogue allocations, + * some will need freeing once the owner process (uid) exits. + */ +struct uid_tag_data { +	struct rb_node node; +	uid_t uid; + +	/* +	 * For the uid, how many accounting tags have been set. +	 */ +	int num_active_tags; +	/* Track the number of proc_qtu_data that reference it */ +	int num_pqd; +	struct rb_root tag_ref_tree; +	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */ +}; + +struct tag_ref { +	struct tag_node tn; + +	/* +	 * This tracks the number of active sockets that have a tag on them +	 * which matches this tag_ref.tn.tag. +	 * A tag ref can live on after the sockets are untagged. +	 * A tag ref can only be removed during a tag delete command. +	 */ +	int num_sock_tags; +}; + +struct proc_qtu_data { +	struct rb_node node; +	pid_t pid; + +	struct uid_tag_data *parent_tag_data; + +	/* Tracks the sock_tags that need freeing upon this proc's death */ +	struct list_head sock_tag_list; +	/* No spinlock_t sock_tag_list_lock; use the global one. */ +}; + +/*----------------------------------------------*/ +#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */ diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 00000000000..8cbd8e42bcc --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c @@ -0,0 +1,564 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Most of the functions in this file just waste time if DEBUG is not defined. + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed + * debug flags ore not defined. + * Those funcs that fail to allocate memory will panic as there is no need to + * hobble allong just pretending to do the requested work. + */ + +#define DEBUG + +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/net.h> +#include <linux/rbtree.h> +#include <linux/slab.h> +#include <linux/spinlock_types.h> + + +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +#ifdef DDEBUG + +static void _bug_on_err_or_null(void *ptr) +{ +	if (IS_ERR_OR_NULL(ptr)) { +		pr_err("qtaguid: kmalloc failed\n"); +		BUG(); +	} +} + +char *pp_tag_t(tag_t *tag) +{ +	char *res; + +	if (!tag) +		res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); +	else +		res = kasprintf(GFP_ATOMIC, +				"tag_t@%p{tag=0x%llx, uid=%u}", +				tag, *tag, get_uid_from_tag(*tag)); +	_bug_on_err_or_null(res); +	return res; +} + +char *pp_data_counters(struct data_counters *dc, bool showValues) +{ +	char *res; + +	if (!dc) +		res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); +	else if (showValues) +		res = kasprintf( +			GFP_ATOMIC, "data_counters@%p{" +			"set0{" +			"rx{" +			"tcp{b=%llu, p=%llu}, " +			"udp{b=%llu, p=%llu}," +			"other{b=%llu, p=%llu}}, " +			"tx{" +			"tcp{b=%llu, p=%llu}, " +			"udp{b=%llu, p=%llu}," +			"other{b=%llu, p=%llu}}}, " +			"set1{" +			"rx{" +			"tcp{b=%llu, p=%llu}, " +			"udp{b=%llu, p=%llu}," +			"other{b=%llu, p=%llu}}, " +			"tx{" +			"tcp{b=%llu, p=%llu}, " +			"udp{b=%llu, p=%llu}," +			"other{b=%llu, p=%llu}}}}", +			dc, +			dc->bpc[0][IFS_RX][IFS_TCP].bytes, +			dc->bpc[0][IFS_RX][IFS_TCP].packets, +			dc->bpc[0][IFS_RX][IFS_UDP].bytes, +			dc->bpc[0][IFS_RX][IFS_UDP].packets, +			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, +			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, +			dc->bpc[0][IFS_TX][IFS_TCP].bytes, +			dc->bpc[0][IFS_TX][IFS_TCP].packets, +			dc->bpc[0][IFS_TX][IFS_UDP].bytes, +			dc->bpc[0][IFS_TX][IFS_UDP].packets, +			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, +			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, +			dc->bpc[1][IFS_RX][IFS_TCP].bytes, +			dc->bpc[1][IFS_RX][IFS_TCP].packets, +			dc->bpc[1][IFS_RX][IFS_UDP].bytes, +			dc->bpc[1][IFS_RX][IFS_UDP].packets, +			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, +			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, +			dc->bpc[1][IFS_TX][IFS_TCP].bytes, +			dc->bpc[1][IFS_TX][IFS_TCP].packets, +			dc->bpc[1][IFS_TX][IFS_UDP].bytes, +			dc->bpc[1][IFS_TX][IFS_UDP].packets, +			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, +			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); +	else +		res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); +	_bug_on_err_or_null(res); +	return res; +} + +char *pp_tag_node(struct tag_node *tn) +{ +	char *tag_str; +	char *res; + +	if (!tn) { +		res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); +		_bug_on_err_or_null(res); +		return res; +	} +	tag_str = pp_tag_t(&tn->tag); +	res = kasprintf(GFP_ATOMIC, +			"tag_node@%p{tag=%s}", +			tn, tag_str); +	_bug_on_err_or_null(res); +	kfree(tag_str); +	return res; +} + +char *pp_tag_ref(struct tag_ref *tr) +{ +	char *tn_str; +	char *res; + +	if (!tr) { +		res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); +		_bug_on_err_or_null(res); +		return res; +	} +	tn_str = pp_tag_node(&tr->tn); +	res = kasprintf(GFP_ATOMIC, +			"tag_ref@%p{%s, num_sock_tags=%d}", +			tr, tn_str, tr->num_sock_tags); +	_bug_on_err_or_null(res); +	kfree(tn_str); +	return res; +} + +char *pp_tag_stat(struct tag_stat *ts) +{ +	char *tn_str; +	char *counters_str; +	char *parent_counters_str; +	char *res; + +	if (!ts) { +		res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); +		_bug_on_err_or_null(res); +		return res; +	} +	tn_str = pp_tag_node(&ts->tn); +	counters_str = pp_data_counters(&ts->counters, true); +	parent_counters_str = pp_data_counters(ts->parent_counters, false); +	res = kasprintf(GFP_ATOMIC, +			"tag_stat@%p{%s, counters=%s, parent_counters=%s}", +			ts, tn_str, counters_str, parent_counters_str); +	_bug_on_err_or_null(res); +	kfree(tn_str); +	kfree(counters_str); +	kfree(parent_counters_str); +	return res; +} + +char *pp_iface_stat(struct iface_stat *is) +{ +	char *res; +	if (!is) +		res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); +	else +		res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" +				"list=list_head{...}, " +				"ifname=%s, " +				"total_dev={rx={bytes=%llu, " +				"packets=%llu}, " +				"tx={bytes=%llu, " +				"packets=%llu}}, " +				"total_skb={rx={bytes=%llu, " +				"packets=%llu}, " +				"tx={bytes=%llu, " +				"packets=%llu}}, " +				"last_known_valid=%d, " +				"last_known={rx={bytes=%llu, " +				"packets=%llu}, " +				"tx={bytes=%llu, " +				"packets=%llu}}, " +				"active=%d, " +				"net_dev=%p, " +				"proc_ptr=%p, " +				"tag_stat_tree=rb_root{...}}", +				is, +				is->ifname, +				is->totals_via_dev[IFS_RX].bytes, +				is->totals_via_dev[IFS_RX].packets, +				is->totals_via_dev[IFS_TX].bytes, +				is->totals_via_dev[IFS_TX].packets, +				is->totals_via_skb[IFS_RX].bytes, +				is->totals_via_skb[IFS_RX].packets, +				is->totals_via_skb[IFS_TX].bytes, +				is->totals_via_skb[IFS_TX].packets, +				is->last_known_valid, +				is->last_known[IFS_RX].bytes, +				is->last_known[IFS_RX].packets, +				is->last_known[IFS_TX].bytes, +				is->last_known[IFS_TX].packets, +				is->active, +				is->net_dev, +				is->proc_ptr); +	_bug_on_err_or_null(res); +	return res; +} + +char *pp_sock_tag(struct sock_tag *st) +{ +	char *tag_str; +	char *res; + +	if (!st) { +		res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); +		_bug_on_err_or_null(res); +		return res; +	} +	tag_str = pp_tag_t(&st->tag); +	res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" +			"sock_node=rb_node{...}, " +			"sk=%p socket=%p (f_count=%lu), list=list_head{...}, " +			"pid=%u, tag=%s}", +			st, st->sk, st->socket, atomic_long_read( +				&st->socket->file->f_count), +			st->pid, tag_str); +	_bug_on_err_or_null(res); +	kfree(tag_str); +	return res; +} + +char *pp_uid_tag_data(struct uid_tag_data *utd) +{ +	char *res; + +	if (!utd) +		res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); +	else +		res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" +				"uid=%u, num_active_acct_tags=%d, " +				"num_pqd=%d, " +				"tag_node_tree=rb_root{...}, " +				"proc_qtu_data_tree=rb_root{...}}", +				utd, utd->uid, +				utd->num_active_tags, utd->num_pqd); +	_bug_on_err_or_null(res); +	return res; +} + +char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ +	char *parent_tag_data_str; +	char *res; + +	if (!pqd) { +		res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); +		_bug_on_err_or_null(res); +		return res; +	} +	parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); +	res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" +			"node=rb_node{...}, pid=%u, " +			"parent_tag_data=%s, " +			"sock_tag_list=list_head{...}}", +			pqd, pqd->pid, parent_tag_data_str +		); +	_bug_on_err_or_null(res); +	kfree(parent_tag_data_str); +	return res; +} + +/*------------------------------------------*/ +void prdebug_sock_tag_tree(int indent_level, +			   struct rb_root *sock_tag_tree) +{ +	struct rb_node *node; +	struct sock_tag *sock_tag_entry; +	char *str; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (RB_EMPTY_ROOT(sock_tag_tree)) { +		str = "sock_tag_tree=rb_root{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "sock_tag_tree=rb_root{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	for (node = rb_first(sock_tag_tree); +	     node; +	     node = rb_next(node)) { +		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); +		str = pp_sock_tag(sock_tag_entry); +		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); +		kfree(str); +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_sock_tag_list(int indent_level, +			   struct list_head *sock_tag_list) +{ +	struct sock_tag *sock_tag_entry; +	char *str; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (list_empty(sock_tag_list)) { +		str = "sock_tag_list=list_head{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "sock_tag_list=list_head{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	list_for_each_entry(sock_tag_entry, sock_tag_list, list) { +		str = pp_sock_tag(sock_tag_entry); +		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); +		kfree(str); +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_proc_qtu_data_tree(int indent_level, +				struct rb_root *proc_qtu_data_tree) +{ +	char *str; +	struct rb_node *node; +	struct proc_qtu_data *proc_qtu_data_entry; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { +		str = "proc_qtu_data_tree=rb_root{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "proc_qtu_data_tree=rb_root{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	for (node = rb_first(proc_qtu_data_tree); +	     node; +	     node = rb_next(node)) { +		proc_qtu_data_entry = rb_entry(node, +					       struct proc_qtu_data, +					       node); +		str = pp_proc_qtu_data(proc_qtu_data_entry); +		pr_debug("%*d: %s,\n", indent_level*2, indent_level, +			 str); +		kfree(str); +		indent_level++; +		prdebug_sock_tag_list(indent_level, +				      &proc_qtu_data_entry->sock_tag_list); +		indent_level--; + +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ +	char *str; +	struct rb_node *node; +	struct tag_ref *tag_ref_entry; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (RB_EMPTY_ROOT(tag_ref_tree)) { +		str = "tag_ref_tree{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "tag_ref_tree{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	for (node = rb_first(tag_ref_tree); +	     node; +	     node = rb_next(node)) { +		tag_ref_entry = rb_entry(node, +					 struct tag_ref, +					 tn.node); +		str = pp_tag_ref(tag_ref_entry); +		pr_debug("%*d: %s,\n", indent_level*2, indent_level, +			 str); +		kfree(str); +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_uid_tag_data_tree(int indent_level, +			       struct rb_root *uid_tag_data_tree) +{ +	char *str; +	struct rb_node *node; +	struct uid_tag_data *uid_tag_data_entry; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (RB_EMPTY_ROOT(uid_tag_data_tree)) { +		str = "uid_tag_data_tree=rb_root{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "uid_tag_data_tree=rb_root{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	for (node = rb_first(uid_tag_data_tree); +	     node; +	     node = rb_next(node)) { +		uid_tag_data_entry = rb_entry(node, struct uid_tag_data, +					      node); +		str = pp_uid_tag_data(uid_tag_data_entry); +		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); +		kfree(str); +		if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { +			indent_level++; +			prdebug_tag_ref_tree(indent_level, +					     &uid_tag_data_entry->tag_ref_tree); +			indent_level--; +		} +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_stat_tree(int indent_level, +				  struct rb_root *tag_stat_tree) +{ +	char *str; +	struct rb_node *node; +	struct tag_stat *ts_entry; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (RB_EMPTY_ROOT(tag_stat_tree)) { +		str = "tag_stat_tree{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "tag_stat_tree{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	for (node = rb_first(tag_stat_tree); +	     node; +	     node = rb_next(node)) { +		ts_entry = rb_entry(node, struct tag_stat, tn.node); +		str = pp_tag_stat(ts_entry); +		pr_debug("%*d: %s\n", indent_level*2, indent_level, +			 str); +		kfree(str); +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_iface_stat_list(int indent_level, +			     struct list_head *iface_stat_list) +{ +	char *str; +	struct iface_stat *iface_entry; + +	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) +		return; + +	if (list_empty(iface_stat_list)) { +		str = "iface_stat_list=list_head{}"; +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		return; +	} + +	str = "iface_stat_list=list_head{"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +	indent_level++; +	list_for_each_entry(iface_entry, iface_stat_list, list) { +		str = pp_iface_stat(iface_entry); +		pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +		kfree(str); + +		spin_lock_bh(&iface_entry->tag_stat_list_lock); +		if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { +			indent_level++; +			prdebug_tag_stat_tree(indent_level, +					      &iface_entry->tag_stat_tree); +			indent_level--; +		} +		spin_unlock_bh(&iface_entry->tag_stat_list_lock); +	} +	indent_level--; +	str = "}"; +	pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +#endif  /* ifdef DDEBUG */ +/*------------------------------------------*/ +static const char * const netdev_event_strings[] = { +	"netdev_unknown", +	"NETDEV_UP", +	"NETDEV_DOWN", +	"NETDEV_REBOOT", +	"NETDEV_CHANGE", +	"NETDEV_REGISTER", +	"NETDEV_UNREGISTER", +	"NETDEV_CHANGEMTU", +	"NETDEV_CHANGEADDR", +	"NETDEV_GOING_DOWN", +	"NETDEV_CHANGENAME", +	"NETDEV_FEAT_CHANGE", +	"NETDEV_BONDING_FAILOVER", +	"NETDEV_PRE_UP", +	"NETDEV_PRE_TYPE_CHANGE", +	"NETDEV_POST_TYPE_CHANGE", +	"NETDEV_POST_INIT", +	"NETDEV_UNREGISTER_BATCH", +	"NETDEV_RELEASE", +	"NETDEV_NOTIFY_PEERS", +	"NETDEV_JOIN", +}; + +const char *netdev_evt_str(int netdev_event) +{ +	if (netdev_event < 0 +	    || netdev_event >= ARRAY_SIZE(netdev_event_strings)) +		return "bad event num"; +	return netdev_event_strings[netdev_event]; +} diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 00000000000..b63871a0be5 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h @@ -0,0 +1,120 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_PRINT_H__ +#define __XT_QTAGUID_PRINT_H__ + +#include "xt_qtaguid_internal.h" + +#ifdef DDEBUG + +char *pp_tag_t(tag_t *tag); +char *pp_data_counters(struct data_counters *dc, bool showValues); +char *pp_tag_node(struct tag_node *tn); +char *pp_tag_ref(struct tag_ref *tr); +char *pp_tag_stat(struct tag_stat *ts); +char *pp_iface_stat(struct iface_stat *is); +char *pp_sock_tag(struct sock_tag *st); +char *pp_uid_tag_data(struct uid_tag_data *qtd); +char *pp_proc_qtu_data(struct proc_qtu_data *pqd); + +/*------------------------------------------*/ +void prdebug_sock_tag_list(int indent_level, +			   struct list_head *sock_tag_list); +void prdebug_sock_tag_tree(int indent_level, +			   struct rb_root *sock_tag_tree); +void prdebug_proc_qtu_data_tree(int indent_level, +				struct rb_root *proc_qtu_data_tree); +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); +void prdebug_uid_tag_data_tree(int indent_level, +			       struct rb_root *uid_tag_data_tree); +void prdebug_tag_stat_tree(int indent_level, +			   struct rb_root *tag_stat_tree); +void prdebug_iface_stat_list(int indent_level, +			     struct list_head *iface_stat_list); + +#else + +/*------------------------------------------*/ +static inline char *pp_tag_t(tag_t *tag) +{ +	return NULL; +} +static inline char *pp_data_counters(struct data_counters *dc, bool showValues) +{ +	return NULL; +} +static inline char *pp_tag_node(struct tag_node *tn) +{ +	return NULL; +} +static inline char *pp_tag_ref(struct tag_ref *tr) +{ +	return NULL; +} +static inline char *pp_tag_stat(struct tag_stat *ts) +{ +	return NULL; +} +static inline char *pp_iface_stat(struct iface_stat *is) +{ +	return NULL; +} +static inline char *pp_sock_tag(struct sock_tag *st) +{ +	return NULL; +} +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) +{ +	return NULL; +} +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ +	return NULL; +} + +/*------------------------------------------*/ +static inline +void prdebug_sock_tag_list(int indent_level, +			   struct list_head *sock_tag_list) +{ +} +static inline +void prdebug_sock_tag_tree(int indent_level, +			   struct rb_root *sock_tag_tree) +{ +} +static inline +void prdebug_proc_qtu_data_tree(int indent_level, +				struct rb_root *proc_qtu_data_tree) +{ +} +static inline +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ +} +static inline +void prdebug_uid_tag_data_tree(int indent_level, +			       struct rb_root *uid_tag_data_tree) +{ +} +static inline +void prdebug_tag_stat_tree(int indent_level, +			   struct rb_root *tag_stat_tree) +{ +} +static inline +void prdebug_iface_stat_list(int indent_level, +			     struct list_head *iface_stat_list) +{ +} +#endif +/*------------------------------------------*/ +const char *netdev_evt_str(int netdev_event); +#endif  /* ifndef __XT_QTAGUID_PRINT_H__ */ diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 00000000000..fb2ef46b2b8 --- /dev/null +++ b/net/netfilter/xt_quota2.c @@ -0,0 +1,382 @@ +/* + * xt_quota2 - enhanced xt_quota that can count upwards and in packets + * as a minimal accounting match. + * by Jan Engelhardt <jengelh@medozas.de>, 2008 + * + * Originally based on xt_quota.c: + * 	netfilter module to enforce network quotas + * 	Sam Johnston <samj@samj.net> + * + *	This program is free software; you can redistribute it and/or modify + *	it under the terms of the GNU General Public License; either + *	version 2 of the License, as published by the Free Software Foundation. + */ +#include <linux/list.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_quota2.h> +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +#include <linux/netfilter_ipv4/ipt_ULOG.h> +#endif + +/** + * @lock:	lock to protect quota writers from each other + */ +struct xt_quota_counter { +	u_int64_t quota; +	spinlock_t lock; +	struct list_head list; +	atomic_t ref; +	char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; +	struct proc_dir_entry *procfs_entry; +}; + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +/* Harald's favorite number +1 :D From ipt_ULOG.C */ +static int qlog_nl_event = 112; +module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(event_num, +		 "Event number for NETLINK_NFLOG message. 0 disables log." +		 "111 is what ipt_ULOG uses."); +static struct sock *nflognl; +#endif + +static LIST_HEAD(counter_list); +static DEFINE_SPINLOCK(counter_list_lock); + +static struct proc_dir_entry *proc_xt_quota; +static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; +static unsigned int quota_list_uid   = 0; +static unsigned int quota_list_gid   = 0; +module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); +module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); +module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); + + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +static void quota2_log(unsigned int hooknum, +		       const struct sk_buff *skb, +		       const struct net_device *in, +		       const struct net_device *out, +		       const char *prefix) +{ +	ulog_packet_msg_t *pm; +	struct sk_buff *log_skb; +	size_t size; +	struct nlmsghdr *nlh; + +	if (!qlog_nl_event) +		return; + +	size = NLMSG_SPACE(sizeof(*pm)); +	size = max(size, (size_t)NLMSG_GOODSIZE); +	log_skb = alloc_skb(size, GFP_ATOMIC); +	if (!log_skb) { +		pr_err("xt_quota2: cannot alloc skb for logging\n"); +		return; +	} + +	/* NLMSG_PUT() uses "goto nlmsg_failure" */ +	nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, +			sizeof(*pm)); +	pm = NLMSG_DATA(nlh); +	if (skb->tstamp.tv64 == 0) +		__net_timestamp((struct sk_buff *)skb); +	pm->data_len = 0; +	pm->hook = hooknum; +	if (prefix != NULL) +		strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); +	else +		*(pm->prefix) = '\0'; +	if (in) +		strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); +	else +		pm->indev_name[0] = '\0'; + +	if (out) +		strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); +	else +		pm->outdev_name[0] = '\0'; + +	NETLINK_CB(log_skb).dst_group = 1; +	pr_debug("throwing 1 packets to netlink group 1\n"); +	netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); + +nlmsg_failure:  /* Used within NLMSG_PUT() */ +	pr_debug("xt_quota2: error during NLMSG_PUT\n"); +} +#else +static void quota2_log(unsigned int hooknum, +		       const struct sk_buff *skb, +		       const struct net_device *in, +		       const struct net_device *out, +		       const char *prefix) +{ +} +#endif  /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ + +static int quota_proc_read(char *page, char **start, off_t offset, +                           int count, int *eof, void *data) +{ +	struct xt_quota_counter *e = data; +	int ret; + +	spin_lock_bh(&e->lock); +	ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); +	spin_unlock_bh(&e->lock); +	return ret; +} + +static int quota_proc_write(struct file *file, const char __user *input, +                            unsigned long size, void *data) +{ +	struct xt_quota_counter *e = data; +	char buf[sizeof("18446744073709551616")]; + +	if (size > sizeof(buf)) +		size = sizeof(buf); +	if (copy_from_user(buf, input, size) != 0) +		return -EFAULT; +	buf[sizeof(buf)-1] = '\0'; + +	spin_lock_bh(&e->lock); +	e->quota = simple_strtoull(buf, NULL, 0); +	spin_unlock_bh(&e->lock); +	return size; +} + +static struct xt_quota_counter * +q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) +{ +	struct xt_quota_counter *e; +	unsigned int size; + +	/* Do not need all the procfs things for anonymous counters. */ +	size = anon ? offsetof(typeof(*e), list) : sizeof(*e); +	e = kmalloc(size, GFP_KERNEL); +	if (e == NULL) +		return NULL; + +	e->quota = q->quota; +	spin_lock_init(&e->lock); +	if (!anon) { +		INIT_LIST_HEAD(&e->list); +		atomic_set(&e->ref, 1); +		strlcpy(e->name, q->name, sizeof(e->name)); +	} +	return e; +} + +/** + * q2_get_counter - get ref to counter or create new + * @name:	name of counter + */ +static struct xt_quota_counter * +q2_get_counter(const struct xt_quota_mtinfo2 *q) +{ +	struct proc_dir_entry *p; +	struct xt_quota_counter *e = NULL; +	struct xt_quota_counter *new_e; + +	if (*q->name == '\0') +		return q2_new_counter(q, true); + +	/* No need to hold a lock while getting a new counter */ +	new_e = q2_new_counter(q, false); +	if (new_e == NULL) +		goto out; + +	spin_lock_bh(&counter_list_lock); +	list_for_each_entry(e, &counter_list, list) +		if (strcmp(e->name, q->name) == 0) { +			atomic_inc(&e->ref); +			spin_unlock_bh(&counter_list_lock); +			kfree(new_e); +			pr_debug("xt_quota2: old counter name=%s", e->name); +			return e; +		} +	e = new_e; +	pr_debug("xt_quota2: new_counter name=%s", e->name); +	list_add_tail(&e->list, &counter_list); +	/* The entry having a refcount of 1 is not directly destructible. +	 * This func has not yet returned the new entry, thus iptables +	 * has not references for destroying this entry. +	 * For another rule to try to destroy it, it would 1st need for this +	 * func* to be re-invoked, acquire a new ref for the same named quota. +	 * Nobody will access the e->procfs_entry either. +	 * So release the lock. */ +	spin_unlock_bh(&counter_list_lock); + +	/* create_proc_entry() is not spin_lock happy */ +	p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, +	                      proc_xt_quota); + +	if (IS_ERR_OR_NULL(p)) { +		spin_lock_bh(&counter_list_lock); +		list_del(&e->list); +		spin_unlock_bh(&counter_list_lock); +		goto out; +	} +	p->data         = e; +	p->read_proc    = quota_proc_read; +	p->write_proc   = quota_proc_write; +	p->uid          = quota_list_uid; +	p->gid          = quota_list_gid; +	return e; + + out: +	kfree(e); +	return NULL; +} + +static int quota_mt2_check(const struct xt_mtchk_param *par) +{ +	struct xt_quota_mtinfo2 *q = par->matchinfo; + +	pr_debug("xt_quota2: check() flags=0x%04x", q->flags); + +	if (q->flags & ~XT_QUOTA_MASK) +		return -EINVAL; + +	q->name[sizeof(q->name)-1] = '\0'; +	if (*q->name == '.' || strchr(q->name, '/') != NULL) { +		printk(KERN_ERR "xt_quota.3: illegal name\n"); +		return -EINVAL; +	} + +	q->master = q2_get_counter(q); +	if (q->master == NULL) { +		printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); +		return -ENOMEM; +	} + +	return 0; +} + +static void quota_mt2_destroy(const struct xt_mtdtor_param *par) +{ +	struct xt_quota_mtinfo2 *q = par->matchinfo; +	struct xt_quota_counter *e = q->master; + +	if (*q->name == '\0') { +		kfree(e); +		return; +	} + +	spin_lock_bh(&counter_list_lock); +	if (!atomic_dec_and_test(&e->ref)) { +		spin_unlock_bh(&counter_list_lock); +		return; +	} + +	list_del(&e->list); +	remove_proc_entry(e->name, proc_xt_quota); +	spin_unlock_bh(&counter_list_lock); +	kfree(e); +} + +static bool +quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) +{ +	struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; +	struct xt_quota_counter *e = q->master; +	bool ret = q->flags & XT_QUOTA_INVERT; + +	spin_lock_bh(&e->lock); +	if (q->flags & XT_QUOTA_GROW) { +		/* +		 * While no_change is pointless in "grow" mode, we will +		 * implement it here simply to have a consistent behavior. +		 */ +		if (!(q->flags & XT_QUOTA_NO_CHANGE)) { +			e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; +		} +		ret = true; +	} else { +		if (e->quota >= skb->len) { +			if (!(q->flags & XT_QUOTA_NO_CHANGE)) +				e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; +			ret = !ret; +		} else { +			/* We are transitioning, log that fact. */ +			if (e->quota) { +				quota2_log(par->hooknum, +					   skb, +					   par->in, +					   par->out, +					   q->name); +			} +			/* we do not allow even small packets from now on */ +			e->quota = 0; +		} +	} +	spin_unlock_bh(&e->lock); +	return ret; +} + +static struct xt_match quota_mt2_reg[] __read_mostly = { +	{ +		.name       = "quota2", +		.revision   = 3, +		.family     = NFPROTO_IPV4, +		.checkentry = quota_mt2_check, +		.match      = quota_mt2, +		.destroy    = quota_mt2_destroy, +		.matchsize  = sizeof(struct xt_quota_mtinfo2), +		.me         = THIS_MODULE, +	}, +	{ +		.name       = "quota2", +		.revision   = 3, +		.family     = NFPROTO_IPV6, +		.checkentry = quota_mt2_check, +		.match      = quota_mt2, +		.destroy    = quota_mt2_destroy, +		.matchsize  = sizeof(struct xt_quota_mtinfo2), +		.me         = THIS_MODULE, +	}, +}; + +static int __init quota_mt2_init(void) +{ +	int ret; +	pr_debug("xt_quota2: init()"); + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +	nflognl = netlink_kernel_create(&init_net, +					NETLINK_NFLOG, 1, NULL, +					NULL, THIS_MODULE); +	if (!nflognl) +		return -ENOMEM; +#endif + +	proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); +	if (proc_xt_quota == NULL) +		return -EACCES; + +	ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); +	if (ret < 0) +		remove_proc_entry("xt_quota", init_net.proc_net); +	pr_debug("xt_quota2: init() %d", ret); +	return ret; +} + +static void __exit quota_mt2_exit(void) +{ +	xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); +	remove_proc_entry("xt_quota", init_net.proc_net); +} + +module_init(quota_mt2_init); +module_exit(quota_mt2_exit); +MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); +MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_quota2"); +MODULE_ALIAS("ip6t_quota2"); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 72bb07f57f9..1e48fcf2920 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,7 +35,7 @@  #include <net/netfilter/nf_conntrack.h>  #endif -static void +void  xt_socket_put_sk(struct sock *sk)  {  	if (sk->sk_state == TCP_TIME_WAIT) @@ -43,6 +43,7 @@ xt_socket_put_sk(struct sock *sk)  	else  		sock_put(sk);  } +EXPORT_SYMBOL(xt_socket_put_sk);  static int  extract_icmp4_fields(const struct sk_buff *skb, @@ -101,9 +102,8 @@ extract_icmp4_fields(const struct sk_buff *skb,  	return 0;  } -static bool -socket_match(const struct sk_buff *skb, struct xt_action_param *par, -	     const struct xt_socket_mtinfo1 *info) +struct sock* +xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par)  {  	const struct iphdr *iph = ip_hdr(skb);  	struct udphdr _hdr, *hp = NULL; @@ -120,7 +120,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,  		hp = skb_header_pointer(skb, ip_hdrlen(skb),  					sizeof(_hdr), &_hdr);  		if (hp == NULL) -			return false; +			return NULL;  		protocol = iph->protocol;  		saddr = iph->saddr; @@ -131,9 +131,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,  	} else if (iph->protocol == IPPROTO_ICMP) {  		if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,  					&sport, &dport)) -			return false; +			return NULL;  	} else { -		return false; +		return NULL;  	}  #ifdef XT_SOCKET_HAVE_CONNTRACK @@ -157,6 +157,23 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,  	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,  				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); + +	pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", +		 protocol, &saddr, ntohs(sport), +		 &daddr, ntohs(dport), +		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); + +	return sk; +} +EXPORT_SYMBOL(xt_socket_get4_sk); + +static bool +socket_match(const struct sk_buff *skb, struct xt_action_param *par, +	     const struct xt_socket_mtinfo1 *info) +{ +	struct sock *sk; + +	sk = xt_socket_get4_sk(skb, par);  	if (sk != NULL) {  		bool wildcard;  		bool transparent = true; @@ -179,11 +196,6 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,  			sk = NULL;  	} -	pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", -		 protocol, &saddr, ntohs(sport), -		 &daddr, ntohs(dport), -		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); -  	return (sk != NULL);  } @@ -255,8 +267,8 @@ extract_icmp6_fields(const struct sk_buff *skb,  	return 0;  } -static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +struct sock* +xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par)  {  	struct ipv6hdr *iph = ipv6_hdr(skb);  	struct udphdr _hdr, *hp = NULL; @@ -264,7 +276,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)  	struct in6_addr *daddr, *saddr;  	__be16 dport, sport;  	int thoff, tproto; -	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;  	tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);  	if (tproto < 0) { @@ -276,7 +287,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)  		hp = skb_header_pointer(skb, thoff,  					sizeof(_hdr), &_hdr);  		if (hp == NULL) -			return false; +			return NULL;  		saddr = &iph->saddr;  		sport = hp->source; @@ -286,13 +297,30 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)  	} else if (tproto == IPPROTO_ICMPV6) {  		if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,  					 &sport, &dport)) -			return false; +			return NULL;  	} else { -		return false; +		return NULL;  	}  	sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,  				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); +	pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " +		 "(orig %pI6:%hu) sock %p\n", +		 tproto, saddr, ntohs(sport), +		 daddr, ntohs(dport), +		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); +	return sk; +} +EXPORT_SYMBOL(xt_socket_get6_sk); + +static bool +socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ +	struct sock *sk; +	const struct xt_socket_mtinfo1 *info; + +	info = (struct xt_socket_mtinfo1 *) par->matchinfo; +	sk = xt_socket_get6_sk(skb, par);  	if (sk != NULL) {  		bool wildcard;  		bool transparent = true; @@ -315,12 +343,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)  			sk = NULL;  	} -	pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " -		 "(orig %pI6:%hu) sock %p\n", -		 tproto, saddr, ntohs(sport), -		 daddr, ntohs(dport), -		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); -  	return (sk != NULL);  }  #endif diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 78efe895b66..8e12c8a2b82 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -10,6 +10,11 @@ menuconfig RFKILL  	  To compile this driver as a module, choose M here: the  	  module will be called rfkill. +config RFKILL_PM +	bool "Power off on suspend" +	depends on RFKILL && PM +	default y +  # LED trigger support  config RFKILL_LEDS  	bool diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 659b1c5cbcd..85cf8866a9b 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -770,6 +770,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)  }  EXPORT_SYMBOL(rfkill_pause_polling); +#ifdef CONFIG_RFKILL_PM  void rfkill_resume_polling(struct rfkill *rfkill)  {  	BUG_ON(!rfkill); @@ -804,14 +805,17 @@ static int rfkill_resume(struct device *dev)  	return 0;  } +#endif  static struct class rfkill_class = {  	.name		= "rfkill",  	.dev_release	= rfkill_release,  	.dev_attrs	= rfkill_dev_attrs,  	.dev_uevent	= rfkill_dev_uevent, +#ifdef CONFIG_RFKILL_PM  	.suspend	= rfkill_suspend,  	.resume		= rfkill_resume, +#endif  };  bool rfkill_blocked(struct rfkill *rfkill) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 2e4444fedbe..8db24da2e2c 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -160,3 +160,14 @@ config LIB80211_DEBUG  	  from lib80211.  	  If unsure, say N. + +config CFG80211_ALLOW_RECONNECT +	bool "Allow reconnect while already connected" +	depends on CFG80211 +	default n +	help +	  cfg80211 stack doesn't allow to connect if you are already +	  connected. This option allows to make a connection in this case. + +	  Select this option ONLY for wlan drivers that are specifically +	  built for such purposes. diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 70faadf16a3..e40104f562b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -18,7 +18,7 @@  #include "nl80211.h"  #include "wext-compat.h" -#define IEEE80211_SCAN_RESULT_EXPIRE	(15 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE	(3 * HZ)  void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)  { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f7e937ff897..bbbed736e5e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -689,8 +689,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,  		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))  		return; +#ifndef CONFIG_CFG80211_ALLOW_RECONNECT  	if (wdev->sme_state != CFG80211_SME_CONNECTED)  		return; +#endif  	if (wdev->current_bss) {  		cfg80211_unhold_bss(wdev->current_bss); @@ -767,10 +769,14 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,  	ASSERT_WDEV_LOCK(wdev); +#ifndef CONFIG_CFG80211_ALLOW_RECONNECT  	if (wdev->sme_state != CFG80211_SME_IDLE)  		return -EALREADY;  	if (WARN_ON(wdev->connect_keys)) { +#else +	if (wdev->connect_keys) { +#endif  		kfree(wdev->connect_keys);  		wdev->connect_keys = NULL;  	} | 
