diff options
| author | Benn Pörscke <benn.porscke@stericsson.com> | 2011-12-16 15:04:55 +0100 | 
|---|---|---|
| committer | Benn Pörscke <benn.porscke@stericsson.com> | 2011-12-16 15:04:55 +0100 | 
| commit | 93f379e6cfadfded0d262192ca69d1abc096d90e (patch) | |
| tree | 43f180e31ee26ee94f7d2dd559132c30c6476b4d /net | |
| parent | 77955e37bd395f789900b8e180991ad67cabd899 (diff) | |
Squashandroid-20111223
Change-Id: I2fcf46d1fc4b0cd4c61e5be3654c43b80db86015
Diffstat (limited to 'net')
| -rw-r--r-- | net/bluetooth/bnep/bnep.h | 1 | ||||
| -rw-r--r-- | net/bluetooth/bnep/core.c | 9 | ||||
| -rw-r--r-- | net/bluetooth/hci_conn.c | 6 | ||||
| -rw-r--r-- | net/bluetooth/hci_core.c | 6 | ||||
| -rw-r--r-- | net/bluetooth/hidp/core.c | 19 | ||||
| -rw-r--r-- | net/bluetooth/l2cap_core.c | 16 | ||||
| -rw-r--r-- | net/bluetooth/l2cap_sock.c | 2 | ||||
| -rw-r--r-- | net/netfilter/xt_qtaguid.c | 1345 | 
8 files changed, 959 insertions, 445 deletions
| diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 449508aeb01..0545fe0493f 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -155,6 +155,7 @@ struct bnep_session {  	unsigned int  role;  	unsigned long state;  	unsigned long flags; +	atomic_t      terminate;  	struct task_struct *task;  	struct ethhdr eh; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index e1c5c1b13d3..b7026f35a9b 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -522,7 +522,7 @@ static int bnep_session(void *arg)  	while (1) {  		set_current_state(TASK_INTERRUPTIBLE); -		if (kthread_should_stop()) +		if (atomic_read(&s->terminate))  			break;  		/* RX */  		while ((skb = skb_dequeue(&sk->sk_receive_queue))) { @@ -677,9 +677,10 @@ int bnep_del_connection(struct bnep_conndel_req *req)  	down_read(&bnep_session_sem);  	s = __bnep_get_session(req->dst); -	if (s) -		kthread_stop(s->task); -	else +	if (s) { +		atomic_inc(&s->terminate); +		wake_up_process(s->task); +	} else  		err = -ENOENT;  	up_read(&bnep_session_sem); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 40248744a86..aecb1e7fdfa 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -631,6 +631,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)  	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {  		struct hci_cp_auth_requested cp; + +		/* encrypt must be pending if auth is also pending */ +		set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); +  		cp.handle = cpu_to_le16(conn->handle);  		hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,  							sizeof(cp), &cp); @@ -696,7 +700,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)  		goto encrypt;  auth: -	if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) +	if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))  		return 0;  	if (!hci_conn_auth(conn, sec_level, auth_type)) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 908fcd384ab..3b391986407 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg)  	BT_ERR("%s command tx timeout", hdev->name);  	atomic_set(&hdev->cmd_cnt, 1); -	clear_bit(HCI_RESET, &hdev->flags);  	tasklet_schedule(&hdev->cmd_task);  } @@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg)  		if (hdev->sent_cmd) {  			atomic_dec(&hdev->cmd_cnt);  			hci_send_frame(skb); -			mod_timer(&hdev->cmd_timer, +			if (test_bit(HCI_RESET, &hdev->flags)) +				del_timer(&hdev->cmd_timer); +			else +				mod_timer(&hdev->cmd_timer,  				  jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));  		} else {  			skb_queue_head(&hdev->cmd_q, skb); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 43b4c2deb7c..fb68f344c34 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -764,6 +764,7 @@ static int hidp_session(void *arg)  	up_write(&hidp_session_sem); +	kfree(session->rd_data);  	kfree(session);  	return 0;  } @@ -841,7 +842,8 @@ static int hidp_setup_input(struct hidp_session *session,  	err = input_register_device(input);  	if (err < 0) { -		hci_conn_put_device(session->conn); +		input_free_device(input); +		session->input = NULL;  		return err;  	} @@ -1044,8 +1046,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,  	}  	err = hid_add_device(session->hid); -	if (err < 0) -		goto err_add_device; +	if (err < 0) { +		atomic_inc(&session->terminate); +		wake_up_process(session->task); +		up_write(&hidp_session_sem); +		return err; +	}  	if (session->input) {  		hidp_send_ctrl_message(session, @@ -1059,12 +1065,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,  	up_write(&hidp_session_sem);  	return 0; -err_add_device: -	hid_destroy_device(session->hid); -	session->hid = NULL; -	atomic_inc(&session->terminate); -	wake_up_process(session->task); -  unlink:  	hidp_del_timer(session); @@ -1090,7 +1090,6 @@ purge:  failed:  	up_write(&hidp_session_sem); -	input_free_device(session->input);  	kfree(session);  	return err;  } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dc23b89d81b..6ec11b06ca2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -251,7 +251,7 @@ static void l2cap_chan_timeout(unsigned long arg)  	if (sock_owned_by_user(sk)) {  		/* sk is owned by user. Try again later */ -		__set_chan_timer(chan, HZ / 5); +		__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  		bh_unlock_sock(sk);  		chan_put(chan);  		return; @@ -2481,7 +2481,7 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd  		if (sock_owned_by_user(sk)) {  			l2cap_state_change(chan, BT_DISCONN);  			__clear_chan_timer(chan); -			__set_chan_timer(chan, HZ / 5); +			__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  			break;  		} @@ -2651,7 +2651,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr  	default:  		sk->sk_err = ECONNRESET; -		__set_chan_timer(chan, HZ * 5); +		__set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT);  		l2cap_send_disconn_req(conn, chan, ECONNRESET);  		goto done;  	} @@ -2708,7 +2708,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd  	if (sock_owned_by_user(sk)) {  		l2cap_state_change(chan, BT_DISCONN);  		__clear_chan_timer(chan); -		__set_chan_timer(chan, HZ / 5); +		__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  		bh_unlock_sock(sk);  		return 0;  	} @@ -2742,7 +2742,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd  	if (sock_owned_by_user(sk)) {  		l2cap_state_change(chan,BT_DISCONN);  		__clear_chan_timer(chan); -		__set_chan_timer(chan, HZ / 5); +		__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  		bh_unlock_sock(sk);  		return 0;  	} @@ -4071,7 +4071,7 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)  	if (encrypt == 0x00) {  		if (chan->sec_level == BT_SECURITY_MEDIUM) {  			__clear_chan_timer(chan); -			__set_chan_timer(chan, HZ * 5); +			__set_chan_timer(chan, L2CAP_ENC_TIMEOUT);  		} else if (chan->sec_level == BT_SECURITY_HIGH)  			l2cap_chan_close(chan, ECONNREFUSED);  	} else { @@ -4136,7 +4136,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)  					L2CAP_CONN_REQ, sizeof(req), &req);  			} else {  				__clear_chan_timer(chan); -				__set_chan_timer(chan, HZ / 10); +				__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  			}  		} else if (chan->state == BT_CONNECT2) {  			struct l2cap_conn_rsp rsp; @@ -4156,7 +4156,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)  				}  			} else {  				l2cap_state_change(chan, BT_DISCONN); -				__set_chan_timer(chan, HZ / 10); +				__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);  				res = L2CAP_CR_SEC_BLOCK;  				stat = L2CAP_CS_NO_INFO;  			} diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7d713b1c4cb..61f1f623091 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p  	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);  	sk->sk_destruct = l2cap_sock_destruct; -	sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); +	sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;  	sock_reset_flag(sk, SOCK_ZAPPED); diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 22552c9b81c..80b5990045b 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -8,8 +8,36 @@   * published by the Free Software Foundation.   */ -/* TODO: support ipv6 for iface_stat. - * Currently if an iface is only v6 it will not have stats collected. */ +/* #define DEBUG */ +/* #define IDEBUG */ +/* #define MDEBUG */ +/* #define RDEBUG */ +/* #define CDEBUG */ + +/* Iface handling */ +#ifdef IDEBUG +#define IF_DEBUG(...) pr_debug(__VA_ARGS__) +#else +#define IF_DEBUG(...) no_printk(__VA_ARGS__) +#endif +/* Iptable Matching */ +#ifdef MDEBUG +#define MT_DEBUG(...) pr_debug(__VA_ARGS__) +#else +#define MT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +/* Red-black tree handling */ +#ifdef RDEBUG +#define RB_DEBUG(...) pr_debug(__VA_ARGS__) +#else +#define RB_DEBUG(...) no_printk(__VA_ARGS__) +#endif +/* procfs ctrl/stats handling */ +#ifdef CDEBUG +#define CT_DEBUG(...) pr_debug(__VA_ARGS__) +#else +#define CT_DEBUG(...) no_printk(__VA_ARGS__) +#endif  #include <linux/file.h>  #include <linux/inetdevice.h> @@ -18,13 +46,16 @@  #include <linux/netfilter/xt_qtaguid.h>  #include <linux/skbuff.h>  #include <linux/workqueue.h> +#include <net/addrconf.h>  #include <net/sock.h>  #include <net/tcp.h>  #include <net/udp.h>  #include <linux/netfilter/xt_socket.h> -/* We only use the xt_socket funcs within a similar context to avoid unexpected - * return values. */ +/* + * We only use the xt_socket funcs within a similar context to avoid unexpected + * return values. + */  #define XT_SOCKET_SUPPORTED_HOOKS \  	((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) @@ -61,7 +92,8 @@ module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,  module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,  		   S_IRUGO | S_IWUSR); -/* After the kernel has initiallized this module, it is still possible +/* + * After the kernel has initiallized this module, it is still possible   * to make it passive:   *  - do not register it via iptables.   *   the matching code will not be invoked. @@ -106,6 +138,14 @@ typedef uint64_t tag_t;  /* Only used via accessors */  static const char *iface_stat_procdirname = "iface_stat";  static struct proc_dir_entry *iface_stat_procdir; + +/* + * For now we only track 2 sets of counters. + * The default set is 0. + * Userspace can activate another set for a given uid being tracked. + */ +#define IFS_MAX_COUNTER_SETS 2 +  enum ifs_tx_rx {  	IFS_TX,  	IFS_RX, @@ -126,18 +166,23 @@ struct byte_packet_counters {  };  struct data_counters { -	struct byte_packet_counters bpc[IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; +	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];  }; -struct tag_stat { +/* Generic tag based node used as a base for rb_tree ops. */ +struct tag_node {  	struct rb_node node;  	tag_t tag; +}; +struct tag_stat { +	struct tag_node tn;  	struct data_counters counters; -	/* If this tag is acct_tag based, we need to count against the -	 * matching parent uid_tag. */ +	/* +	 * If this tag is acct_tag based, we need to count against the +	 * matching parent uid_tag. +	 */  	struct data_counters *parent_counters; -	struct proc_dir_entry *proc_ptr;  };  struct iface_stat { @@ -157,20 +202,60 @@ struct iface_stat {  static LIST_HEAD(iface_stat_list);  static DEFINE_SPINLOCK(iface_stat_list_lock); +/* This is needed to create proc_dir_entries from atomic context. */ +struct iface_stat_work { +	struct work_struct iface_work; +	struct iface_stat *iface_entry; +}; +  /* - * Track tag that this socket is transferring data for, and not necesseraly + * Track tag that this socket is transferring data for, and not necessarily   * the uid that owns the socket.   * This is the tag against which tag_stat.counters will be billed.   */  struct sock_tag { -	struct rb_node node; -	struct sock *sk; +	struct rb_node sock_node; +	struct sock *sk;  /* Only used as a number, never dereferenced */ +	/* The socket is needed for sockfd_put() */ +	struct socket *socket; +  	tag_t tag;  }; +struct qtaguid_event_counts { +	/* Various successful events */ +	atomic64_t sockets_tagged; +	atomic64_t sockets_untagged; +	atomic64_t counter_set_changes; +	atomic64_t delete_cmds; +	atomic64_t iface_events;  /* Number of NETDEV_* events handled */ +	/* +	 * match_found_sk_*: numbers related to the netfilter matching +	 * function finding a sock for the sk_buff. +	 */ +	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */ +	/* The connection tracker had the sk. */ +	atomic64_t match_found_sk_in_ct; +	/* +	 * No sk could be found. No apparent owner. Could happen with +	 * unsolicited traffic. +	 */ +	atomic64_t match_found_sk_none; +}; +static struct qtaguid_event_counts qtu_events; +  static struct rb_root sock_tag_tree = RB_ROOT;  static DEFINE_SPINLOCK(sock_tag_list_lock); +/* Track the set active_set for the given tag. */ +struct tag_counter_set { +	struct tag_node tn; +	int active_set; +}; + +static struct rb_root tag_counter_set_tree = RB_ROOT; +static DEFINE_SPINLOCK(tag_counter_set_list_lock); +  static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);  /*----------------------------------------------*/ @@ -179,7 +264,6 @@ static inline int tag_compare(tag_t t1, tag_t t2)  	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;  } -  static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)  {  	return acct_tag | uid; @@ -206,40 +290,42 @@ static inline bool valid_atag(tag_t tag)  	return !(tag & 0xFFFFFFFFULL);  } -static inline void dc_add_byte_packets(struct data_counters *counters, +static inline void dc_add_byte_packets(struct data_counters *counters, int set,  				  enum ifs_tx_rx direction,  				  enum ifs_proto ifs_proto,  				  int bytes,  				  int packets)  { -	counters->bpc[direction][ifs_proto].bytes += bytes; -	counters->bpc[direction][ifs_proto].packets += packets; +	counters->bpc[set][direction][ifs_proto].bytes += bytes; +	counters->bpc[set][direction][ifs_proto].packets += packets;  }  static inline uint64_t dc_sum_bytes(struct data_counters *counters, +				    int set,  				    enum ifs_tx_rx direction)  { -	return counters->bpc[direction][IFS_TCP].bytes -		+ counters->bpc[direction][IFS_UDP].bytes -		+ counters->bpc[direction][IFS_PROTO_OTHER].bytes; +	return counters->bpc[set][direction][IFS_TCP].bytes +		+ counters->bpc[set][direction][IFS_UDP].bytes +		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;  }  static inline uint64_t dc_sum_packets(struct data_counters *counters, +				      int set,  				      enum ifs_tx_rx direction)  { -	return counters->bpc[direction][IFS_TCP].packets -		+ counters->bpc[direction][IFS_UDP].packets -		+ counters->bpc[direction][IFS_PROTO_OTHER].packets; +	return counters->bpc[set][direction][IFS_TCP].packets +		+ counters->bpc[set][direction][IFS_UDP].packets +		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;  } -static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)  {  	struct rb_node *node = root->rb_node;  	while (node) { -		struct tag_stat *data = rb_entry(node, struct tag_stat, node); +		struct tag_node *data = rb_entry(node, struct tag_node, node);  		int result = tag_compare(tag, data->tag); -		pr_debug("qtaguid: tag_stat_tree_search(): tag=0x%llx" +		RB_DEBUG("qtaguid: tag_node_tree_search(): tag=0x%llx"  			 " (uid=%d)\n",  			 data->tag,  			 get_uid_from_tag(data->tag)); @@ -254,16 +340,16 @@ static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)  	return NULL;  } -static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)  {  	struct rb_node **new = &(root->rb_node), *parent = NULL;  	/* Figure out where to put new node */  	while (*new) { -		struct tag_stat *this = rb_entry(*new, struct tag_stat, +		struct tag_node *this = rb_entry(*new, struct tag_node,  						 node);  		int result = tag_compare(data->tag, this->tag); -		pr_debug("qtaguid: tag_stat_tree_insert(): tag=0x%llx" +		RB_DEBUG("qtaguid: tag_node_tree_insert(): tag=0x%llx"  			 " (uid=%d)\n",  			 this->tag,  			 get_uid_from_tag(this->tag)); @@ -281,13 +367,43 @@ static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)  	rb_insert_color(&data->node, root);  } +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +{ +	tag_node_tree_insert(&data->tn, root); +} + +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +{ +	struct tag_node *node = tag_node_tree_search(root, tag); +	if (!node) +		return NULL; +	return rb_entry(&node->node, struct tag_stat, tn.node); +} + +static void tag_counter_set_tree_insert(struct tag_counter_set *data, +					struct rb_root *root) +{ +	tag_node_tree_insert(&data->tn, root); +} + +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, +							   tag_t tag) +{ +	struct tag_node *node = tag_node_tree_search(root, tag); +	if (!node) +		return NULL; +	return rb_entry(&node->node, struct tag_counter_set, tn.node); + +} +  static struct sock_tag *sock_tag_tree_search(struct rb_root *root,  					     const struct sock *sk)  {  	struct rb_node *node = root->rb_node;  	while (node) { -		struct sock_tag *data = rb_entry(node, struct sock_tag, node); +		struct sock_tag *data = rb_entry(node, struct sock_tag, +						 sock_node);  		ptrdiff_t result = sk - data->sk;  		if (result < 0)  			node = node->rb_left; @@ -305,7 +421,8 @@ static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)  	/* Figure out where to put new node */  	while (*new) { -		struct sock_tag *this = rb_entry(*new, struct sock_tag, node); +		struct sock_tag *this = rb_entry(*new, struct sock_tag, +						 sock_node);  		ptrdiff_t result = data->sk - this->sk;  		parent = *new;  		if (result < 0) @@ -317,8 +434,8 @@ static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)  	}  	/* Add new node and rebalance tree. */ -	rb_link_node(&data->node, parent, new); -	rb_insert_color(&data->node, root); +	rb_link_node(&data->sock_node, parent, new); +	rb_insert_color(&data->sock_node, root);  }  static int read_proc_u64(char *page, char **start, off_t off, @@ -328,6 +445,7 @@ static int read_proc_u64(char *page, char **start, off_t off,  	uint64_t value;  	char *p = page;  	uint64_t *iface_entry = data; +  	if (!data)  		return 0; @@ -346,6 +464,7 @@ static int read_proc_bool(char *page, char **start, off_t off,  	bool value;  	char *p = page;  	bool *bool_entry = data; +  	if (!data)  		return 0; @@ -357,265 +476,387 @@ static int read_proc_bool(char *page, char **start, off_t off,  	return len;  } -/* Find the entry for tracking the specified interface. */ -static struct iface_stat *get_iface_stat(const char *ifname) +static int get_active_counter_set(tag_t tag) +{ +	int active_set = 0; +	struct tag_counter_set *tcs; + +	MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" +		 " (uid=%u)\n", +		 tag, get_uid_from_tag(tag)); +	/* For now we only handle UID tags for active sets */ +	tag = get_utag_from_tag(tag); +	spin_lock_bh(&tag_counter_set_list_lock); +	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (tcs) +		active_set = tcs->active_set; +	spin_unlock_bh(&tag_counter_set_list_lock); +	return active_set; +} + +/* + * Find the entry for tracking the specified interface. + * Caller must hold iface_stat_list_lock + */ +static struct iface_stat *get_iface_entry(const char *ifname)  { -	unsigned long flags;  	struct iface_stat *iface_entry; -	if (!ifname) + +	/* Find the entry for tracking the specified tag within the interface */ +	if (ifname == NULL) { +		pr_info("qtaguid: iface_stat: get() NULL device name\n");  		return NULL; +	} -	spin_lock_irqsave(&iface_stat_list_lock, flags); +	/* Iterate over interfaces */  	list_for_each_entry(iface_entry, &iface_stat_list, list) { -		if (!strcmp(iface_entry->ifname, ifname)) +		if (!strcmp(ifname, iface_entry->ifname))  			goto done;  	}  	iface_entry = NULL;  done: -	spin_unlock_irqrestore(&iface_stat_list_lock, flags);  	return iface_entry;  } +static void iface_create_proc_worker(struct work_struct *work) +{ +	struct proc_dir_entry *proc_entry; +	struct iface_stat_work *isw = container_of(work, struct iface_stat_work, +						   iface_work); +	struct iface_stat *new_iface  = isw->iface_entry; + +	/* iface_entries are not deleted, so safe to manipulate. */ +	proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); +	if (IS_ERR_OR_NULL(proc_entry)) { +		pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); +		kfree(isw); +		return; +	} + +	new_iface->proc_ptr = proc_entry; + +	create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, +			read_proc_u64, &new_iface->tx_bytes); +	create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, +			read_proc_u64, &new_iface->rx_bytes); +	create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, +			read_proc_u64, &new_iface->tx_packets); +	create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, +			read_proc_u64, &new_iface->rx_packets); +	create_proc_read_entry("active", proc_iface_perms, proc_entry, +			read_proc_bool, &new_iface->active); + +	IF_DEBUG("qtaguid: iface_stat: create_proc(): done " +		 "entry=%p dev=%s\n", new_iface, new_iface->ifname); +	kfree(isw); +} + +/* Caller must hold iface_stat_list_lock */ +static struct iface_stat *iface_alloc(const char *ifname) +{ +	struct iface_stat *new_iface; +	struct iface_stat_work *isw; + +	new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); +	if (new_iface == NULL) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "iface_stat alloc failed\n", ifname); +		return NULL; +	} +	new_iface->ifname = kstrdup(ifname, GFP_ATOMIC); +	if (new_iface->ifname == NULL) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "ifname alloc failed\n", ifname); +		kfree(new_iface); +		return NULL; +	} +	spin_lock_init(&new_iface->tag_stat_list_lock); +	new_iface->active = true; +	new_iface->tag_stat_tree = RB_ROOT; + +	/* +	 * ipv6 notifier chains are atomic :( +	 * No create_proc_read_entry() for you! +	 */ +	isw = kmalloc(sizeof(*isw), GFP_ATOMIC); +	if (!isw) { +		pr_err("qtaguid: iface_stat: create(%s): " +		       "work alloc failed\n", new_iface->ifname); +		kfree(new_iface->ifname); +		kfree(new_iface); +		return NULL; +	} +	isw->iface_entry = new_iface; +	INIT_WORK(&isw->iface_work, iface_create_proc_worker); +	schedule_work(&isw->iface_work); +	list_add(&new_iface->list, &iface_stat_list); +	return new_iface; +} +  /*   * Create a new entry for tracking the specified interface.   * Do nothing if the entry already exists.   * Called when an interface is configured with a valid IP address.   */ -void iface_stat_create(const struct net_device *net_dev) +void iface_stat_create(const struct net_device *net_dev, +		       struct in_ifaddr *ifa)  { -	struct in_device *in_dev; -	unsigned long flags; -	struct iface_stat *new_iface; -	struct proc_dir_entry *proc_entry; +	struct in_device *in_dev = NULL;  	const char *ifname;  	struct iface_stat *entry;  	__be32 ipaddr = 0; -	struct in_ifaddr *ifa = NULL; - -	ASSERT_RTNL(); /* No need for separate locking */ +	struct iface_stat *new_iface; -	pr_debug("iface_stat: create(): netdev=%p->name=%s\n", -		 net_dev, net_dev ? net_dev->name : ""); +	IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", +		 net_dev ? net_dev->name : "?", +		 ifa, net_dev);  	if (!net_dev) { -		pr_err("iface_stat: create(): no net dev!\n"); +		pr_err("qtaguid: iface_stat: create(): no net dev\n");  		return;  	} -	in_dev = __in_dev_get_rtnl(net_dev); -	if (!in_dev) { -		pr_err("iface_stat: create(): no inet dev!\n"); -		return; +	ifname = net_dev->name; +	if (!ifa) { +		in_dev = in_dev_get(net_dev); +		if (!in_dev) { +			pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", +			       ifname); +			return; +		} +		IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", +			 ifname, in_dev); +		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { +			IF_DEBUG("qtaguid: iface_stat: create(%s): " +				 "ifa=%p ifa_label=%s\n", +				 ifname, ifa, +				 ifa->ifa_label ? ifa->ifa_label : "(null)"); +			if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) +				break; +		}  	} -	pr_debug("iface_stat: create(): in_dev=%p\n", in_dev); -	ifname = net_dev->name; -	pr_debug("iface_stat: create(): ifname=%p\n", ifname); -	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { -		pr_debug("iface_stat: create(): for(): ifa=%p ifname=%p\n", -			 ifa, ifname); -		pr_debug("iface_stat: create(): ifname=%s ifa_label=%s\n", -			 ifname, ifa->ifa_label ? ifa->ifa_label : "(null)"); -		if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) -			break; -	} - -	if (ifa) { -		ipaddr = ifa->ifa_local; -	} else { -		pr_err("iface_stat: create(): dev %s has no matching IP\n", -		       ifname); -		return; +	if (!ifa) { +		IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", +			 ifname); +		goto done_put;  	} +	ipaddr = ifa->ifa_local; -	entry = get_iface_stat(net_dev->name); +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(ifname);  	if (entry != NULL) { -		pr_debug("iface_stat: create(): dev %s entry=%p\n", ifname, -			 entry); +		IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", +			 ifname, entry);  		if (ipv4_is_loopback(ipaddr)) {  			entry->active = false; -			pr_debug("iface_stat: create(): disable tracking of " -				 "loopback dev %s\n", ifname); +			IF_DEBUG("qtaguid: iface_stat: create(%s): " +				 "disable tracking of loopback dev\n", +				 ifname);  		} else {  			entry->active = true; -			pr_debug("iface_stat: create(): enable tracking of " -				 "dev %s with ip=%pI4\n", +			IF_DEBUG("qtaguid: iface_stat: create(%s): " +				 "enable tracking. ip=%pI4\n",  				 ifname, &ipaddr);  		} -		return; +		goto done_unlock_put;  	} else if (ipv4_is_loopback(ipaddr)) { -		pr_debug("iface_stat: create(): ignore loopback dev %s" -			 " ip=%pI4\n", ifname, &ipaddr); -		return; +		IF_DEBUG("qtaguid: iface_stat: create(%s): " +			 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); +		goto done_unlock_put;  	} -	new_iface = kzalloc(sizeof(*new_iface), GFP_KERNEL); -	if (new_iface == NULL) { -		pr_err("iface_stat: create(): failed to alloc iface_stat\n"); +	new_iface = iface_alloc(ifname); +	IF_DEBUG("qtaguid: iface_stat: create(%s): done " +		 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); + +done_unlock_put: +	spin_unlock_bh(&iface_stat_list_lock); +done_put: +	if (in_dev) +		in_dev_put(in_dev); +} + +void iface_stat_create_ipv6(const struct net_device *net_dev, +			    struct inet6_ifaddr *ifa) +{ +	struct in_device *in_dev; +	const char *ifname; +	struct iface_stat *entry; +	struct iface_stat *new_iface; +	int addr_type; + +	IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", +		 ifa, net_dev, net_dev ? net_dev->name : ""); +	if (!net_dev) { +		pr_err("qtaguid: iface_stat: create6(): no net dev!\n");  		return;  	} -	new_iface->ifname = kstrdup(ifname, GFP_KERNEL); -	if (new_iface->ifname == NULL) { -		pr_err("iface_stat: create(): failed to alloc ifname\n"); -		kfree(new_iface); +	ifname = net_dev->name; + +	in_dev = in_dev_get(net_dev); +	if (!in_dev) { +		pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", +		       ifname);  		return;  	} -	spin_lock_init(&new_iface->tag_stat_list_lock); -	new_iface->active = true; +	IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", +		 ifname, in_dev); -	new_iface->tag_stat_tree = RB_ROOT; -	spin_lock_irqsave(&iface_stat_list_lock, flags); -	list_add(&new_iface->list, &iface_stat_list); -	spin_unlock_irqrestore(&iface_stat_list_lock, flags); +	if (!ifa) { +		IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", +			 ifname); +		goto done_put; +	} +	addr_type = ipv6_addr_type(&ifa->addr); -	proc_entry = proc_mkdir(ifname, iface_stat_procdir); -	new_iface->proc_ptr = proc_entry; +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(ifname); +	if (entry != NULL) { +		IF_DEBUG("qtaguid: iface_stat: create6(%s): entry=%p\n", +			 ifname, entry); +		if (addr_type & IPV6_ADDR_LOOPBACK) { +			entry->active = false; +			IF_DEBUG("qtaguid: iface_stat: create6(%s): " +				 "disable tracking of loopback dev\n", +				 ifname); +		} else { +			entry->active = true; +			IF_DEBUG("qtaguid: iface_stat: create6(%s): " +				 "enable tracking. ip=%pI6c\n", +				 ifname, &ifa->addr); +		} +		goto done_unlock_put; +	} else if (addr_type & IPV6_ADDR_LOOPBACK) { +		IF_DEBUG("qtaguid: iface_stat: create6(%s): " +			 "ignore loopback dev. ip=%pI6c\n", +			 ifname, &ifa->addr); +		goto done_unlock_put; +	} -	/* TODO: make root access only */ -	create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, -			read_proc_u64, &new_iface->tx_bytes); -	create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, -			read_proc_u64, &new_iface->rx_bytes); -	create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, -			read_proc_u64, &new_iface->tx_packets); -	create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, -			read_proc_u64, &new_iface->rx_packets); -	create_proc_read_entry("active", proc_iface_perms, proc_entry, -			read_proc_bool, &new_iface->active); +	new_iface = iface_alloc(ifname); +	IF_DEBUG("qtaguid: iface_stat: create6(%s): done " +		 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); -	pr_debug("iface_stat: create(): done entry=%p dev=%s ip=%pI4\n", -		 new_iface, ifname, &ipaddr); +done_unlock_put: +	spin_unlock_bh(&iface_stat_list_lock); +done_put: +	in_dev_put(in_dev);  }  static struct sock_tag *get_sock_stat_nl(const struct sock *sk)  { -	pr_debug("xt_qtaguid: get_sock_stat_nl(sk=%p)\n", sk); +	MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);  	return sock_tag_tree_search(&sock_tag_tree, sk);  }  static struct sock_tag *get_sock_stat(const struct sock *sk)  { -	unsigned long flags;  	struct sock_tag *sock_tag_entry; -	pr_debug("xt_qtaguid: get_sock_stat(sk=%p)\n", sk); +	MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);  	if (!sk)  		return NULL; -	spin_lock_irqsave(&sock_tag_list_lock, flags); +	spin_lock_bh(&sock_tag_list_lock);  	sock_tag_entry = get_sock_stat_nl(sk); -	spin_unlock_irqrestore(&sock_tag_list_lock, flags); +	spin_unlock_bh(&sock_tag_list_lock);  	return sock_tag_entry;  }  static void -data_counters_update(struct data_counters *dc,  enum ifs_tx_rx direction, -		int proto, int bytes) +data_counters_update(struct data_counters *dc, int set, +		     enum ifs_tx_rx direction, int proto, int bytes)  {  	switch (proto) {  	case IPPROTO_TCP: -		dc_add_byte_packets(dc, direction, IFS_TCP, bytes, 1); +		dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);  		break;  	case IPPROTO_UDP: -		dc_add_byte_packets(dc, direction, IFS_UDP, bytes, 1); +		dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);  		break;  	case IPPROTO_IP:  	default: -		dc_add_byte_packets(dc, direction, IFS_PROTO_OTHER, bytes, 1); +		dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, +				    1);  		break;  	}  } -  /*   * Update stats for the specified interface. Do nothing if the entry   * does not exist (when a device was never configured with an IP address).   * Called when an device is being unregistered.   */ -void iface_stat_update(struct net_device *dev) +static void iface_stat_update(struct net_device *dev)  {  	struct rtnl_link_stats64 dev_stats, *stats;  	struct iface_stat *entry; -	stats = dev_get_stats(dev, &dev_stats); -	ASSERT_RTNL(); -	entry = get_iface_stat(dev->name); +	stats = dev_get_stats(dev, &dev_stats); +	spin_lock_bh(&iface_stat_list_lock); +	entry = get_iface_entry(dev->name);  	if (entry == NULL) { -		pr_debug("iface_stat: dev %s monitor not found\n", dev->name); +		IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", +			 dev->name); +		spin_unlock_bh(&iface_stat_list_lock);  		return;  	} +	IF_DEBUG("qtaguid: iface_stat: update(%s): entry=%p\n", +		 dev->name, entry);  	if (entry->active) {  		entry->tx_bytes += stats->tx_bytes;  		entry->tx_packets += stats->tx_packets;  		entry->rx_bytes += stats->rx_bytes;  		entry->rx_packets += stats->rx_packets;  		entry->active = false; -		pr_debug("iface_stat: Updating stats for " -			"dev %s which went down\n", dev->name); +		IF_DEBUG("qtaguid: iface_stat: update(%s): " +			 " disable tracking. rx/tx=%llu/%llu\n", +			 dev->name, stats->rx_bytes, stats->tx_bytes);  	} else { -		pr_debug("iface_stat: Did not update stats for " -			"dev %s which went down\n", dev->name); +		IF_DEBUG("qtaguid: iface_stat: update(%s): disabled\n", +			dev->name);  	} +	spin_unlock_bh(&iface_stat_list_lock);  } -  static void tag_stat_update(struct tag_stat *tag_entry,  			enum ifs_tx_rx direction, int proto, int bytes)  { -	pr_debug("xt_qtaguid: tag_stat_update(tag=0x%llx (uid=%d) dir=%d " -		"proto=%d bytes=%d)\n", -		tag_entry->tag, get_uid_from_tag(tag_entry->tag), direction, -		proto, bytes); -	data_counters_update(&tag_entry->counters, direction, proto, bytes); +	int active_set; +	active_set = get_active_counter_set(tag_entry->tn.tag); +	MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " +		 "dir=%d proto=%d bytes=%d)\n", +		 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), +		 active_set, direction, proto, bytes); +	data_counters_update(&tag_entry->counters, active_set, direction, +			     proto, bytes);  	if (tag_entry->parent_counters) -		data_counters_update(tag_entry->parent_counters, direction, -				proto, bytes); +		data_counters_update(tag_entry->parent_counters, active_set, +				     direction, proto, bytes);  } - -/* Create a new entry for tracking the specified {acct_tag,uid_tag} within +/* + * Create a new entry for tracking the specified {acct_tag,uid_tag} within   * the interface. - * iface_entry->tag_stat_list_lock should be held. */ + * iface_entry->tag_stat_list_lock should be held. + */  static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,  					   tag_t tag)  {  	struct tag_stat *new_tag_stat_entry = NULL; -	pr_debug("iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx" -		 " (uid=%d)\n", +	IF_DEBUG("qtaguid: iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx" +		 " (uid=%u)\n",  		 iface_entry, tag, get_uid_from_tag(tag));  	new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);  	if (!new_tag_stat_entry) { -		pr_err("iface_stat: failed to alloc new tag entry\n"); +		pr_err("qtaguid: iface_stat: tag stat alloc failed\n");  		goto done;  	} -	new_tag_stat_entry->tag = tag; +	new_tag_stat_entry->tn.tag = tag;  	tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);  done:  	return new_tag_stat_entry;  } -static struct iface_stat *get_iface_entry(const char *ifname) -{ -	struct iface_stat *iface_entry; -	unsigned long flags; - -	/* Find the entry for tracking the specified tag within the interface */ -	if (ifname == NULL) { -		pr_info("iface_stat: NULL device name\n"); -		return NULL; -	} - - -	/* Iterate over interfaces */ -	spin_lock_irqsave(&iface_stat_list_lock, flags); -	list_for_each_entry(iface_entry, &iface_stat_list, list) { -		if (!strcmp(ifname, iface_entry->ifname)) -			goto done; -	} -	iface_entry = NULL; -done: -	spin_unlock_irqrestore(&iface_stat_list_lock, flags); -	return iface_entry; -} -  static void if_tag_stat_update(const char *ifname, uid_t uid,  			       const struct sock *sk, enum ifs_tx_rx direction,  			       int proto, int bytes) @@ -626,25 +867,27 @@ static void if_tag_stat_update(const char *ifname, uid_t uid,  	struct data_counters *uid_tag_counters;  	struct sock_tag *sock_tag_entry;  	struct iface_stat *iface_entry; -	unsigned long flags;  	struct tag_stat *new_tag_stat; -	pr_debug("xt_qtaguid: if_tag_stat_update(ifname=%s " -		"uid=%d sk=%p dir=%d proto=%d bytes=%d)\n", +	MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " +		"uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",  		 ifname, uid, sk, direction, proto, bytes);  	iface_entry = get_iface_entry(ifname);  	if (!iface_entry) { -		pr_err("iface_stat: interface %s not found\n", ifname); +		pr_err("qtaguid: iface_stat: stat_update() %s not found\n", +		       ifname);  		return;  	} -	/* else { If the iface_entry becomes inactive, it is still ok -	 * to process the data. } */ +	/* It is ok to process data when an iface_entry is inactive */ -	pr_debug("iface_stat: stat_update() got entry=%p\n", iface_entry); +	MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", +		 ifname, iface_entry); -	/* Look for a tagged sock. -	 * It will have an acct_uid. */ +	/* +	 * Look for a tagged sock. +	 * It will have an acct_uid. +	 */  	sock_tag_entry = get_sock_stat(sk);  	if (sock_tag_entry) {  		tag = sock_tag_entry->tag; @@ -655,19 +898,21 @@ static void if_tag_stat_update(const char *ifname, uid_t uid,  		acct_tag = 0;  		tag = combine_atag_with_uid(acct_tag, uid);  	} -	pr_debug("iface_stat: stat_update(): looking for tag=0x%llx (uid=%d)" -		 " in ife=%p\n", +	MT_DEBUG("qtaguid: iface_stat: stat_update(): " +		 " looking for tag=0x%llx (uid=%u) in ife=%p\n",  		 tag, get_uid_from_tag(tag), iface_entry);  	/* Loop over tag list under this interface for {acct_tag,uid_tag} */ -	spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags); +	spin_lock_bh(&iface_entry->tag_stat_list_lock);  	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,  					      tag);  	if (tag_stat_entry) { -		/* Updating the {acct_tag, uid_tag} entry handles both stats: -		 * {0, uid_tag} will also get updated. */ +		/* +		 * Updating the {acct_tag, uid_tag} entry handles both stats: +		 * {0, uid_tag} will also get updated. +		 */  		tag_stat_update(tag_stat_entry, direction, proto, bytes); -		spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags); +		spin_unlock_bh(&iface_entry->tag_stat_list_lock);  		return;  	} @@ -690,7 +935,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid,  		new_tag_stat = create_if_tag_stat(iface_entry, tag);  		new_tag_stat->parent_counters = uid_tag_counters;  	} -	spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags); +	spin_unlock_bh(&iface_entry->tag_stat_list_lock);  	tag_stat_update(new_tag_stat, direction, proto, bytes);  } @@ -701,42 +946,76 @@ static int iface_netdev_event_handler(struct notifier_block *nb,  	if (unlikely(module_passive))  		return NOTIFY_DONE; -	pr_debug("iface_stat: netdev_event(): ev=0x%lx netdev=%p->name=%s\n", +	IF_DEBUG("qtaguid: iface_stat: netdev_event(): " +		 "ev=0x%lx netdev=%p->name=%s\n",  		 event, dev, dev ? dev->name : "");  	switch (event) {  	case NETDEV_UP: -	case NETDEV_REBOOT: -	case NETDEV_CHANGE: -	case NETDEV_REGISTER:  /* Most likely no IP */ -	case NETDEV_CHANGEADDR:  /* MAC addr change */ -	case NETDEV_CHANGENAME: -	case NETDEV_FEAT_CHANGE:  /* Might be usefull when cell type changes */ -		iface_stat_create(dev); +		iface_stat_create(dev, NULL);  		break; -	case NETDEV_UNREGISTER: +	case NETDEV_DOWN:  		iface_stat_update(dev);  		break;  	}  	return NOTIFY_DONE;  } -static int iface_inetaddr_event_handler(struct notifier_block *nb, -					unsigned long event, void *ptr) { +static int iface_inet6addr_event_handler(struct notifier_block *nb, +					 unsigned long event, void *ptr) +{ +	struct inet6_ifaddr *ifa = ptr; +	struct net_device *dev; + +	if (unlikely(module_passive)) +		return NOTIFY_DONE; + +	IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " +		 "ev=0x%lx ifa=%p\n", +		 event, ifa); + +	switch (event) { +	case NETDEV_UP: +		BUG_ON(!ifa || !ifa->idev); +		dev = (struct net_device *)ifa->idev->dev; +		iface_stat_create_ipv6(dev, ifa); +		atomic64_inc(&qtu_events.iface_events); +		break; +	case NETDEV_DOWN: +		BUG_ON(!ifa || !ifa->idev); +		dev = (struct net_device *)ifa->idev->dev; +		iface_stat_update(dev); +		atomic64_inc(&qtu_events.iface_events); +		break; +	} +	return NOTIFY_DONE; +} +static int iface_inetaddr_event_handler(struct notifier_block *nb, +					unsigned long event, void *ptr) +{  	struct in_ifaddr *ifa = ptr; -	struct in_device *in_dev = ifa->ifa_dev; -	struct net_device *dev = in_dev->dev; +	struct net_device *dev;  	if (unlikely(module_passive))  		return NOTIFY_DONE; -	pr_debug("iface_stat: inetaddr_event(): ev=0x%lx netdev=%p->name=%s\n", -		 event, dev, dev ? dev->name : ""); +	IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " +		 "ev=0x%lx ifa=%p\n", +		 event, ifa);  	switch (event) {  	case NETDEV_UP: -		iface_stat_create(dev); +		BUG_ON(!ifa || !ifa->ifa_dev); +		dev = ifa->ifa_dev->dev; +		iface_stat_create(dev, ifa); +		atomic64_inc(&qtu_events.iface_events); +		break; +	case NETDEV_DOWN: +		BUG_ON(!ifa || !ifa->ifa_dev); +		dev = ifa->ifa_dev->dev; +		iface_stat_update(dev); +		atomic64_inc(&qtu_events.iface_events);  		break;  	}  	return NOTIFY_DONE; @@ -750,28 +1029,43 @@ static struct notifier_block iface_inetaddr_notifier_blk = {  	.notifier_call = iface_inetaddr_event_handler,  }; +static struct notifier_block iface_inet6addr_notifier_blk = { +	.notifier_call = iface_inet6addr_event_handler, +}; +  static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)  {  	int err;  	iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);  	if (!iface_stat_procdir) { -		pr_err("iface_stat: failed to create proc entry\n"); +		pr_err("qtaguid: iface_stat: init failed to create proc entry\n");  		err = -1;  		goto err;  	}  	err = register_netdevice_notifier(&iface_netdev_notifier_blk);  	if (err) { -		pr_err("iface_stat: failed to register dev event handler\n"); -		goto err_unreg_nd; +		pr_err("qtaguid: iface_stat: init " +		       "failed to register dev event handler\n"); +		goto err_zap_entry;  	}  	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);  	if (err) { -		pr_err("iface_stat: failed to register dev event handler\n"); -		goto err_zap_entry; +		pr_err("qtaguid: iface_stat: init " +		       "failed to register ipv4 dev event handler\n"); +		goto err_unreg_nd; +	} + +	err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); +	if (err) { +		pr_err("qtaguid: iface_stat: init " +		       "failed to register ipv6 dev event handler\n"); +		goto err_unreg_ip4_addr;  	}  	return 0; +err_unreg_ip4_addr: +	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);  err_unreg_nd:  	unregister_netdevice_notifier(&iface_netdev_notifier_blk);  err_zap_entry: @@ -786,11 +1080,13 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,  	struct sock *sk;  	unsigned int hook_mask = (1 << par->hooknum); -	pr_debug("xt_qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, +	MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,  		 par->hooknum, par->family); -	/* Let's not abuse the the xt_socket_get*_sk(), or else it will -	 * return garbage SKs. */ +	/* +	 * Let's not abuse the the xt_socket_get*_sk(), or else it will +	 * return garbage SKs. +	 */  	if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))  		return NULL; @@ -805,12 +1101,13 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,  		return NULL;  	} -	/* Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. +	/* +	 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.  	 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959  	 * Not fixed in 3.0-r3 :(  	 */  	if (sk) { -		pr_debug("xt_qtaguid: %p->sk_proto=%u " +		MT_DEBUG("qtaguid: %p->sk_proto=%u "  			 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);  		if (sk->sk_state  == TCP_TIME_WAIT) {  			xt_socket_put_sk(sk); @@ -827,14 +1124,14 @@ static void account_for_uid(const struct sk_buff *skb,  	const struct net_device *el_dev;  	if (!skb->dev) { -		pr_debug("xt_qtaguid[%d]: no skb->dev\n", par->hooknum); +		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);  		el_dev = par->in ? : par->out;  	} else {  		const struct net_device *other_dev;  		el_dev = skb->dev;  		other_dev = par->in ? : par->out;  		if (el_dev != other_dev) { -			pr_debug("xt_qtaguid[%d]: skb->dev=%p %s vs " +			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "  				"par->(in/out)=%p %s\n",  				par->hooknum, el_dev, el_dev->name, other_dev,  				other_dev->name); @@ -842,14 +1139,14 @@ static void account_for_uid(const struct sk_buff *skb,  	}  	if (unlikely(!el_dev)) { -		pr_info("xt_qtaguid[%d]: no par->in/out?!!\n", par->hooknum); +		pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);  	} else if (unlikely(!el_dev->name)) { -		pr_info("xt_qtaguid[%d]: no dev->name?!!\n", par->hooknum); +		pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);  	} else { -		pr_debug("xt_qtaguid[%d]: dev name=%s type=%d\n", -			par->hooknum, -			el_dev->name, -			el_dev->type); +		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", +			 par->hooknum, +			 el_dev->name, +			 el_dev->type);  		if_tag_stat_update(el_dev->name, uid,  				skb->sk ? skb->sk : alternate_sk, @@ -867,7 +1164,10 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)  	uid_t sock_uid;  	bool res; -	pr_debug("xt_qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", +	if (unlikely(module_passive)) +		return (info->match ^ info->invert) == 0; + +	MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",  		 par->hooknum, skb, par->in, par->out, par->family);  	if (skb == NULL) { @@ -878,38 +1178,50 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)  	sk = skb->sk;  	if (sk == NULL) { -		/*  A missing sk->sk_socket happens when packets are in-flight +		/* +		 * A missing sk->sk_socket happens when packets are in-flight  		 * and the matching socket is already closed and gone.  		 */  		sk = qtaguid_find_sk(skb, par); -		/* If we got the socket from the find_sk(), we will need to put -		 * it back, as nf_tproxy_get_sock_v4() got it. */ +		/* +		 * If we got the socket from the find_sk(), we will need to put +		 * it back, as nf_tproxy_get_sock_v4() got it. +		 */  		got_sock = sk; +		if (sk) +			atomic64_inc(&qtu_events.match_found_sk_in_ct); +	} else { +		atomic64_inc(&qtu_events.match_found_sk);  	} -	pr_debug("xt_qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", +	MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",  		par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);  	if (sk != NULL) { -		pr_debug("xt_qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", +		MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",  			par->hooknum, sk, sk->sk_socket,  			sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);  		filp = sk->sk_socket ? sk->sk_socket->file : NULL; -		pr_debug("xt_qtaguid[%d]: filp...uid=%d\n", +		MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",  			par->hooknum, filp ? filp->f_cred->fsuid : -1);  	}  	if (sk == NULL || sk->sk_socket == NULL) { -		/* Here, the qtaguid_find_sk() using connection tracking +		/* +		 * Here, the qtaguid_find_sk() using connection tracking  		 * couldn't find the owner, so for now we just count them -		 * against the system. */ -		/* TODO: unhack how to force just accounting. +		 * against the system. +		 */ +		/* +		 * TODO: unhack how to force just accounting.  		 * For now we only do iface stats when the uid-owner is not -		 * requested */ +		 * requested. +		 */  		if (!(info->match & XT_QTAGUID_UID))  			account_for_uid(skb, sk, 0, par); -		pr_debug("xt_qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", +		MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",  			par->hooknum,  			sk ? sk->sk_socket : NULL);  		res = (info->match ^ info->invert) == 0; +		atomic64_inc(&qtu_events.match_found_sk_none);  		goto put_sock_ret_res;  	} else if (info->match & info->invert & XT_QTAGUID_SOCKET) {  		res = false; @@ -917,18 +1229,21 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)  	}  	filp = sk->sk_socket->file;  	if (filp == NULL) { -		pr_debug("xt_qtaguid[%d]: leaving filp=NULL\n", par->hooknum); +		MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);  		res = ((info->match ^ info->invert) &  			(XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;  		goto put_sock_ret_res;  	}  	sock_uid = filp->f_cred->fsuid; -	/* TODO: unhack how to force just accounting. -	 * For now we only do iface stats when the uid-owner is not requested */ +	/* +	 * TODO: unhack how to force just accounting. +	 * For now we only do iface stats when the uid-owner is not requested +	 */  	if (!(info->match & XT_QTAGUID_UID))  		account_for_uid(skb, sk, sock_uid, par); -	/* The following two tests fail the match when: +	/* +	 * The following two tests fail the match when:  	 *    id not in range AND no inverted condition requested  	 * or id     in range AND    inverted condition requested  	 * Thus (!a && b) || (a && !b) == a ^ b @@ -937,7 +1252,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)  		if ((filp->f_cred->fsuid >= info->uid_min &&  		     filp->f_cred->fsuid <= info->uid_max) ^  		    !(info->invert & XT_QTAGUID_UID)) { -			pr_debug("xt_qtaguid[%d]: leaving uid not matching\n", +			MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",  				 par->hooknum);  			res = false;  			goto put_sock_ret_res; @@ -946,20 +1261,20 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)  		if ((filp->f_cred->fsgid >= info->gid_min &&  				filp->f_cred->fsgid <= info->gid_max) ^  			!(info->invert & XT_QTAGUID_GID)) { -			pr_debug("xt_qtaguid[%d]: leaving gid not matching\n", +			MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",  				par->hooknum);  			res = false;  			goto put_sock_ret_res;  		} -	pr_debug("xt_qtaguid[%d]: leaving matched\n", par->hooknum); +	MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);  	res = true;  put_sock_ret_res:  	if (got_sock)  		xt_socket_put_sk(sk);  ret_res: -	pr_debug("xt_qtaguid[%d]: left %d\n", par->hooknum, res); +	MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);  	return res;  } @@ -973,7 +1288,6 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,  {  	char *outp = page;  	int len; -	unsigned long flags;  	uid_t uid;  	struct sock_tag *sock_tag_entry;  	struct rb_node *node; @@ -984,29 +1298,59 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,  		return 0;  	} -	pr_debug("xt_qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", +	/* TODO: support skipping num_items_returned on entry. */ +	CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",  		page, items_to_skip, char_count, *eof);  	if (*eof)  		return 0; -	spin_lock_irqsave(&sock_tag_list_lock, flags); +	spin_lock_bh(&sock_tag_list_lock);  	for (node = rb_first(&sock_tag_tree);  	     node;  	     node = rb_next(node)) {  		if (item_index++ < items_to_skip)  			continue; -		sock_tag_entry = rb_entry(node, struct sock_tag, node); +		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);  		uid = get_uid_from_tag(sock_tag_entry->tag); -		pr_debug("xt_qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%d)\n", -			sock_tag_entry->sk, -			sock_tag_entry->tag, -			uid); +		CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u)\n", +			 sock_tag_entry->sk, +			 sock_tag_entry->tag, +			 uid +			);  		len = snprintf(outp, char_count,  			       "sock=%p tag=0x%llx (uid=%u)\n",  			       sock_tag_entry->sk, sock_tag_entry->tag, uid);  		if (len >= char_count) { -			spin_unlock_irqrestore(&sock_tag_list_lock, flags); +			spin_unlock_bh(&sock_tag_list_lock); +			*outp = '\0'; +			return outp - page; +		} +		outp += len; +		char_count -= len; +		(*num_items_returned)++; +	} +	spin_unlock_bh(&sock_tag_list_lock); + +	if (item_index++ >= items_to_skip) { +		len = snprintf(outp, char_count, +			       "events: sockets_tagged=%llu " +			       "sockets_untagged=%llu " +			       "counter_set_changes=%llu " +			       "delete_cmds=%llu " +			       "iface_events=%llu " +			       "match_found_sk=%llu " +			       "match_found_sk_in_ct=%llu " +			       "match_found_sk_none=%llu\n", +			       atomic64_read(&qtu_events.sockets_tagged), +			       atomic64_read(&qtu_events.sockets_untagged), +			       atomic64_read(&qtu_events.counter_set_changes), +			       atomic64_read(&qtu_events.delete_cmds), +			       atomic64_read(&qtu_events.iface_events), +			       atomic64_read(&qtu_events.match_found_sk), +			       atomic64_read(&qtu_events.match_found_sk_in_ct), +			       atomic64_read(&qtu_events.match_found_sk_none)); +		if (len >= char_count) {  			*outp = '\0';  			return outp - page;  		} @@ -1014,124 +1358,215 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,  		char_count -= len;  		(*num_items_returned)++;  	} -	spin_unlock_irqrestore(&sock_tag_list_lock, flags); +  	*eof = 1;  	return outp - page;  } -int can_impersonate_uid(uid_t uid) +static bool can_manipulate_uids(void)  { -	return uid == current_fsuid() -		|| !proc_ctrl_write_gid +	/* root pwnd */ +	return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)  		|| in_egroup_p(proc_ctrl_write_gid);  } -int can_read_other_uid_stats(uid_t uid) +static bool can_impersonate_uid(uid_t uid)  { -	return uid == current_fsuid() -		|| !proc_ctrl_write_gid +	return uid == current_fsuid() || can_manipulate_uids(); +} + +static bool can_read_other_uid_stats(uid_t uid) +{ +	/* root pwnd */ +	return unlikely(!current_fsuid()) || uid == current_fsuid() +		|| unlikely(!proc_stats_readall_gid)  		|| in_egroup_p(proc_stats_readall_gid);  } -/* Delete socket tags, and stat tags associated with a given - * accouting tag and uid. */ +/* + * Delete socket tags, and stat tags associated with a given + * accouting tag and uid. + */  static int ctrl_cmd_delete(const char *input)  {  	char cmd; -	uid_t uid = 0; +	uid_t uid;  	uid_t entry_uid; -	tag_t acct_tag = 0; +	tag_t acct_tag;  	tag_t tag;  	int res, argc; -	unsigned long flags, flags2;  	struct iface_stat *iface_entry;  	struct rb_node *node;  	struct sock_tag *st_entry; +	struct rb_root st_to_free_tree = RB_ROOT;  	struct tag_stat *ts_entry; +	struct tag_counter_set *tcs_entry; -	pr_debug("xt_qtaguid: ctrl_delete(%s): entered\n", input);  	argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); -	pr_debug("xt_qtaguid: ctrl_delete(%s): argc=%d cmd=%c " -		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, +	CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " +		 "user_tag=0x%llx uid=%u\n", input, argc, cmd,  		 acct_tag, uid);  	if (argc < 2) {  		res = -EINVAL;  		goto err;  	}  	if (!valid_atag(acct_tag)) { -		pr_info("xt_qtaguid: ctrl_delete(%s): invalid tag\n", input); +		pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);  		res = -EINVAL;  		goto err;  	}  	if (argc < 3) {  		uid = current_fsuid();  	} else if (!can_impersonate_uid(uid)) { -		pr_info("xt_qtaguid: ctrl_delete(%s): insuficient priv\n", -			input); +		pr_info("qtaguid: ctrl_delete(%s): " +			"insufficient priv from pid=%u uid=%u\n", +			input, current->pid, current_fsuid());  		res = -EPERM;  		goto err;  	} -	spin_lock_irqsave(&sock_tag_list_lock, flags); +	/* Delete socket tags */ +	spin_lock_bh(&sock_tag_list_lock);  	node = rb_first(&sock_tag_tree);  	while (node) { -		st_entry = rb_entry(node, struct sock_tag, node); +		st_entry = rb_entry(node, struct sock_tag, sock_node);  		entry_uid = get_uid_from_tag(st_entry->tag);  		node = rb_next(node);  		if (entry_uid != uid)  			continue;  		if (!acct_tag || st_entry->tag == tag) { -			pr_debug("xt_qtaguid: ctrl_delete(): " -				 "erase sk=%p tag=0x%llx (uid=%d)\n", -				 st_entry->sk, -				 st_entry->tag, -				 entry_uid); -			rb_erase(&ts_entry->node, &sock_tag_tree); -			kfree(st_entry); +			rb_erase(&st_entry->sock_node, &sock_tag_tree); +			/* Can't sockfd_put() within spinlock, do it later. */ +			sock_tag_tree_insert(st_entry, &st_to_free_tree);  		}  	} -	spin_unlock_irqrestore(&sock_tag_list_lock, flags); +	spin_unlock_bh(&sock_tag_list_lock); + +	node = rb_first(&st_to_free_tree); +	while (node) { +		st_entry = rb_entry(node, struct sock_tag, sock_node); +		node = rb_next(node); +		CT_DEBUG("qtaguid: ctrl_delete(): " +			 "erase st: sk=%p tag=0x%llx (uid=%u)\n", +			 st_entry->sk, +			 st_entry->tag, +			 entry_uid); +		rb_erase(&st_entry->sock_node, &st_to_free_tree); +		sockfd_put(st_entry->socket); +		kfree(st_entry); +	} -	/* If acct_tag is 0, then all entries belonging to uid are -	 * erased. */  	tag = combine_atag_with_uid(acct_tag, uid); -	spin_lock_irqsave(&iface_stat_list_lock, flags); -	list_for_each_entry(iface_entry, &iface_stat_list, list) { -		spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2); +	/* Delete tag counter-sets */ +	spin_lock_bh(&tag_counter_set_list_lock); +	tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (tcs_entry) { +		CT_DEBUG("qtaguid: ctrl_delete(): " +			 "erase tcs: tag=0x%llx (uid=%u) set=%d\n", +			 tcs_entry->tn.tag, +			 get_uid_from_tag(tcs_entry->tn.tag), +			 tcs_entry->active_set); +		rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); +		kfree(tcs_entry); +	} +	spin_unlock_bh(&tag_counter_set_list_lock); + +	/* +	 * If acct_tag is 0, then all entries belonging to uid are +	 * erased. +	 */ +	spin_lock_bh(&iface_stat_list_lock); +	list_for_each_entry(iface_entry, &iface_stat_list, list) { +		spin_lock_bh(&iface_entry->tag_stat_list_lock);  		node = rb_first(&iface_entry->tag_stat_tree);  		while (node) { -			ts_entry = rb_entry(node, struct tag_stat, node); -			entry_uid = get_uid_from_tag(ts_entry->tag); +			ts_entry = rb_entry(node, struct tag_stat, tn.node); +			entry_uid = get_uid_from_tag(ts_entry->tn.tag);  			node = rb_next(node);  			if (entry_uid != uid)  				continue; -			if (!acct_tag || ts_entry->tag == tag) { -				pr_debug("xt_qtaguid: ctrl_delete(): erase " -					 "%s 0x%llx %u\n", +			if (!acct_tag || ts_entry->tn.tag == tag) { +				CT_DEBUG("qtaguid: ctrl_delete(): " +					 "erase ts: %s 0x%llx %u\n",  					 iface_entry->ifname, -					 get_atag_from_tag(ts_entry->tag), +					 get_atag_from_tag(ts_entry->tn.tag),  					 entry_uid); -				rb_erase(&ts_entry->node, +				rb_erase(&ts_entry->tn.node,  					 &iface_entry->tag_stat_tree);  				kfree(ts_entry);  			}  		} -		spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, -				       flags2); +		spin_unlock_bh(&iface_entry->tag_stat_list_lock); +	} +	spin_unlock_bh(&iface_stat_list_lock); +	atomic64_inc(&qtu_events.delete_cmds); +	res = 0; +err: +	return res; +} + +static int ctrl_cmd_counter_set(const char *input) +{ +	char cmd; +	uid_t uid = 0; +	tag_t tag; +	int res, argc; +	struct tag_counter_set *tcs; +	int counter_set; + +	argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); +	CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " +		 "set=%d uid=%u\n", input, argc, cmd, +		 counter_set, uid); +	if (argc != 3) { +		res = -EINVAL; +		goto err; +	} +	if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { +		pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", +			input); +		res = -EINVAL; +		goto err; +	} +	if (!can_manipulate_uids()) { +		pr_info("qtaguid: ctrl_counterset(%s): " +			"insufficient priv from pid=%u uid=%u\n", +			input, current->pid, current_fsuid()); +		res = -EPERM; +		goto err;  	} -	spin_unlock_irqrestore(&iface_stat_list_lock, flags); +	tag = make_tag_from_uid(uid); +	spin_lock_bh(&tag_counter_set_list_lock); +	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); +	if (!tcs) { +		tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); +		if (!tcs) { +			spin_unlock_bh(&tag_counter_set_list_lock); +			pr_err("qtaguid: ctrl_counterset(%s): " +			       "failed to alloc counter set\n", +			       input); +			res = -ENOMEM; +			goto err; +		} +		tcs->tn.tag = tag; +		tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); +		CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " +			 "(uid=%u) set=%d\n", +			 input, tag, get_uid_from_tag(tag), counter_set); +	} +	tcs->active_set = counter_set; +	spin_unlock_bh(&tag_counter_set_list_lock); +	atomic64_inc(&qtu_events.counter_set_changes);  	res = 0;  err: -	pr_debug("xt_qtaguid: ctrl_delete(%s) res=%d\n", input, res);  	return res;  } -  static int ctrl_cmd_tag(const char *input)  {  	char cmd; @@ -1139,113 +1574,157 @@ static int ctrl_cmd_tag(const char *input)  	uid_t uid = 0;  	tag_t acct_tag = 0;  	struct socket *el_socket; +	int refcnt = -1;  	int res, argc;  	struct sock_tag *sock_tag_entry; -	unsigned long flags;  	/* Unassigned args will get defaulted later. */  	argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); -	pr_debug("xt_qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " +	CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "  		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,  		 acct_tag, uid);  	if (argc < 2) {  		res = -EINVAL;  		goto err;  	} -	el_socket = sockfd_lookup(sock_fd, &res); +	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */  	if (!el_socket) { -		pr_info("xt_qtaguid: ctrl_tag(%s): failed to lookup" +		pr_info("qtaguid: ctrl_tag(%s): failed to lookup"  			" sock_fd=%d err=%d\n", input, sock_fd, res);  		goto err;  	} +	refcnt = atomic_read(&el_socket->file->f_count); +	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%d\n", +		 input, refcnt);  	if (argc < 3) {  		acct_tag = 0;  	} else if (!valid_atag(acct_tag)) { -		pr_info("xt_qtaguid: ctrl_tag(%s): invalid tag\n", input); +		pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);  		res = -EINVAL; -		goto err; +		goto err_put;  	} +	CT_DEBUG("qtaguid: ctrl_tag(%s): " +		 "uid=%u euid=%u fsuid=%u " +		 "in_group=%d in_egroup=%d\n", +		 input, current_uid(), current_euid(), current_fsuid(), +		 in_group_p(proc_stats_readall_gid), +		 in_egroup_p(proc_stats_readall_gid));  	if (argc < 4) {  		uid = current_fsuid();  	} else if (!can_impersonate_uid(uid)) { -		pr_info("xt_qtaguid: ctrl_tag(%s): insuficient priv\n", -			input); +		pr_info("qtaguid: ctrl_tag(%s): " +			"insufficient priv from pid=%u uid=%u\n", +			input, current->pid, current_fsuid());  		res = -EPERM; -		goto err; +		goto err_put;  	} -	spin_lock_irqsave(&sock_tag_list_lock, flags); +	spin_lock_bh(&sock_tag_list_lock);  	sock_tag_entry = get_sock_stat_nl(el_socket->sk);  	if (sock_tag_entry) { +		/* +		 * This is a re-tagging, so release the sock_fd that was +		 * locked at the time of the 1st tagging. +		 */ +		sockfd_put(sock_tag_entry->socket); +		refcnt--;  		sock_tag_entry->tag = combine_atag_with_uid(acct_tag,  							    uid);  	} else { -		spin_unlock_irqrestore(&sock_tag_list_lock, flags);  		sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), -					 GFP_KERNEL); +					 GFP_ATOMIC);  		if (!sock_tag_entry) { +			pr_err("qtaguid: ctrl_tag(%s): " +			       "socket tag alloc failed\n", +			       input); +			spin_unlock_bh(&sock_tag_list_lock);  			res = -ENOMEM; -			goto err; +			goto err_put;  		}  		sock_tag_entry->sk = el_socket->sk; +		sock_tag_entry->socket = el_socket;  		sock_tag_entry->tag = combine_atag_with_uid(acct_tag,  							    uid); -		spin_lock_irqsave(&sock_tag_list_lock, flags);  		sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); +		atomic64_inc(&qtu_events.sockets_tagged);  	} -	spin_unlock_irqrestore(&sock_tag_list_lock, flags); - -	pr_debug("xt_qtaguid: tag: sock_tag_entry->sk=%p " -		 "...->tag=0x%llx (uid=%u)\n", -		 sock_tag_entry->sk, sock_tag_entry->tag, -		 get_uid_from_tag(sock_tag_entry->tag)); -	res = 0; +	spin_unlock_bh(&sock_tag_list_lock); +	/* We keep the ref to the socket (file) until it is untagged */ +	CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n", +		 input, +		 el_socket ? atomic_read(&el_socket->file->f_count) : -1); +	return 0; +err_put: +	/* Release the sock_fd that was grabbed by sockfd_lookup(). */ +	sockfd_put(el_socket); +	refcnt--;  err: -	pr_debug("xt_qtaguid: ctrl_tag(%s) res=%d\n", input, res); +	CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n", +		 input, refcnt);  	return res;  } -  static int ctrl_cmd_untag(const char *input)  {  	char cmd;  	int sock_fd = 0;  	struct socket *el_socket; +	int refcnt = -1;  	int res, argc;  	struct sock_tag *sock_tag_entry; -	unsigned long flags; -	pr_debug("xt_qtaguid: ctrl_untag(%s): entered\n", input);  	argc = sscanf(input, "%c %d", &cmd, &sock_fd); -	pr_debug("xt_qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", +	CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",  		 input, argc, cmd, sock_fd);  	if (argc < 2) {  		res = -EINVAL;  		goto err;  	} -	el_socket = sockfd_lookup(sock_fd, &res); +	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */  	if (!el_socket) { -		pr_info("xt_qtaguid: ctrl_untag(%s): failed to lookup" +		pr_info("qtaguid: ctrl_untag(%s): failed to lookup"  			" sock_fd=%d err=%d\n", input, sock_fd, res);  		goto err;  	} -	spin_lock_irqsave(&sock_tag_list_lock, flags); +	refcnt = atomic_read(&el_socket->file->f_count); +	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%d\n", +		 input, refcnt); +	spin_lock_bh(&sock_tag_list_lock);  	sock_tag_entry = get_sock_stat_nl(el_socket->sk);  	if (!sock_tag_entry) { -		spin_unlock_irqrestore(&sock_tag_list_lock, flags); +		spin_unlock_bh(&sock_tag_list_lock);  		res = -EINVAL; -		goto err; +		goto err_put;  	} -	/* The socket already belongs to the current process -	 * so it can do whatever it wants to it. */ -	rb_erase(&sock_tag_entry->node, &sock_tag_tree); -	spin_unlock_irqrestore(&sock_tag_list_lock, flags); +	/* +	 * The socket already belongs to the current process +	 * so it can do whatever it wants to it. +	 */ +	rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); + +	/* +	 * Release the sock_fd that was grabbed at tag time, +	 * and once more for the sockfd_lookup() here. +	 */ +	sockfd_put(sock_tag_entry->socket); +	spin_unlock_bh(&sock_tag_list_lock); +	sockfd_put(el_socket); +	refcnt -= 2;  	kfree(sock_tag_entry); +	atomic64_inc(&qtu_events.sockets_untagged); +	CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n", +		 input, refcnt); -	res = 0; +	return 0; + +err_put: +	/* Release the sock_fd that was grabbed by sockfd_lookup(). */ +	sockfd_put(el_socket); +	refcnt--;  err: -	pr_debug("xt_qtaguid: ctrl_untag(%s): res=%d\n", input, res); +	CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n", +		 input, refcnt);  	return res;  } @@ -1254,7 +1733,6 @@ static int qtaguid_ctrl_parse(const char *input, int count)  	char cmd;  	int res; -	pr_debug("xt_qtaguid: ctrl(%s): entered\n", input);  	cmd = input[0];  	/* Collect params for commands */  	switch (cmd) { @@ -1262,6 +1740,10 @@ static int qtaguid_ctrl_parse(const char *input, int count)  		res = ctrl_cmd_delete(input);  		break; +	case 's': +		res = ctrl_cmd_counter_set(input); +		break; +  	case 't':  		res = ctrl_cmd_tag(input);  		break; @@ -1277,7 +1759,7 @@ static int qtaguid_ctrl_parse(const char *input, int count)  	if (!res)  		res = count;  err: -	pr_debug("xt_qtaguid: ctrl(%s): res=%d\n", input, res); +	CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);  	return res;  } @@ -1300,14 +1782,22 @@ static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,  	return qtaguid_ctrl_parse(input_buf, count);  } -static int print_stats_line(char *outp, int char_count, int item_index, -			    char *ifname, tag_t tag, -			    struct data_counters *counters) +struct proc_print_info { +	char *outp; +	char **num_items_returned; +	struct iface_stat *iface_entry; +	struct tag_stat *ts_entry; +	int item_index; +	int char_count; +}; + +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)  {  	int len; -	if (!item_index) { -		len = snprintf(outp, char_count, -			       "idx iface acct_tag_hex uid_tag_int " +	struct data_counters *cnts; +	if (!ppi->item_index) { +		len = snprintf(ppi->outp, ppi->char_count, +			       "idx iface acct_tag_hex uid_tag_int cnt_set "  			       "rx_bytes rx_packets "  			       "tx_bytes tx_packets "  			       "rx_tcp_packets rx_tcp_bytes " @@ -1317,47 +1807,73 @@ static int print_stats_line(char *outp, int char_count, int item_index,  			       "tx_udp_packets tx_udp_bytes "  			       "tx_other_packets tx_other_bytes\n");  	} else { +		tag_t tag = ppi->ts_entry->tn.tag;  		uid_t stat_uid = get_uid_from_tag(tag);  		if (!can_read_other_uid_stats(stat_uid)) { -			pr_debug("xt_qtaguid: insufficient priv for stat line:" -				 "%s 0x%llx %u\n", -				 ifname, get_atag_from_tag(tag), stat_uid); +			CT_DEBUG("qtaguid: stats line: " +				 "%s 0x%llx %u: " +				 "insufficient priv from pid=%u uid=%u\n", +				 ppi->iface_entry->ifname, +				 get_atag_from_tag(tag), stat_uid, +				 current->pid, current_fsuid());  			return 0;  		} -		len = snprintf(outp, char_count, -			       "%d %s 0x%llx %u " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu " -			       "%llu %llu\n", -			       item_index, -			       ifname, -			       get_atag_from_tag(tag), -			       stat_uid, -			       dc_sum_bytes(counters, IFS_RX), -			       dc_sum_packets(counters, IFS_RX), -			       dc_sum_bytes(counters, IFS_TX), -			       dc_sum_packets(counters, IFS_TX), -			       counters->bpc[IFS_RX][IFS_TCP].bytes, -			       counters->bpc[IFS_RX][IFS_TCP].packets, -			       counters->bpc[IFS_RX][IFS_UDP].bytes, -			       counters->bpc[IFS_RX][IFS_UDP].packets, -			       counters->bpc[IFS_RX][IFS_PROTO_OTHER].bytes, -			       counters->bpc[IFS_RX][IFS_PROTO_OTHER].packets, -			       counters->bpc[IFS_TX][IFS_TCP].bytes, -			       counters->bpc[IFS_TX][IFS_TCP].packets, -			       counters->bpc[IFS_TX][IFS_UDP].bytes, -			       counters->bpc[IFS_TX][IFS_UDP].packets, -			       counters->bpc[IFS_TX][IFS_PROTO_OTHER].bytes, -			       counters->bpc[IFS_TX][IFS_PROTO_OTHER].packets); +		cnts = &ppi->ts_entry->counters; +		len = snprintf( +			ppi->outp, ppi->char_count, +			"%d %s 0x%llx %u %u " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu " +			"%llu %llu\n", +			ppi->item_index, +			ppi->iface_entry->ifname, +			get_atag_from_tag(tag), +			stat_uid, +			cnt_set, +			dc_sum_bytes(cnts, cnt_set, IFS_RX), +			dc_sum_packets(cnts, cnt_set, IFS_RX), +			dc_sum_bytes(cnts, cnt_set, IFS_TX), +			dc_sum_packets(cnts, cnt_set, IFS_TX), +			cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, +			cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, +			cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, +			cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, +			cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, +			cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);  	}  	return len;  } +bool pp_sets(struct proc_print_info *ppi) +{ +	int len; +	int counter_set; +	for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; +	     counter_set++) { +		len = pp_stats_line(ppi, counter_set); +		if (len >= ppi->char_count) { +			*ppi->outp = '\0'; +			return false; +		} +		if (len) { +			ppi->outp += len; +			ppi->char_count -= len; +			(*ppi->num_items_returned)++; +		} +	} +	return true; +}  /*   * Procfs reader to get all tag stats using style "1)" as described in @@ -1368,19 +1884,23 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,  				off_t items_to_skip, int char_count, int *eof,  				void *data)  { -	char *outp = page; +	struct proc_print_info ppi;  	int len; -	unsigned long flags, flags2; -	struct iface_stat *iface_entry; -	struct tag_stat *ts_entry; -	int item_index = 0; + +	ppi.outp = page; +	ppi.item_index = 0; +	ppi.char_count = char_count; +	ppi.num_items_returned = num_items_returned;  	if (unlikely(module_passive)) { +		len = pp_stats_line(&ppi, 0); +		/* The header should always be shorter than the buffer. */ +		WARN_ON(len >= ppi.char_count);  		*eof = 1; -		return 0; +		return len;  	} -	pr_debug("xt_qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " +	CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "  		"char_count=%d *eof=%d\n", page, *num_items_returned,  		items_to_skip, char_count, *eof); @@ -1389,53 +1909,39 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,  	if (!items_to_skip) {  		/* The idx is there to help debug when things go belly up. */ -		len = print_stats_line(outp, char_count, /*index*/0, NULL, -				       make_tag_from_uid(0), NULL); +		len = pp_stats_line(&ppi, 0);  		/* Don't advance the outp unless the whole line was printed */ -		if (len >= char_count) { -			*outp = '\0'; -			return outp - page; +		if (len >= ppi.char_count) { +			*ppi.outp = '\0'; +			return ppi.outp - page;  		} -		outp += len; -		char_count -= len; +		ppi.outp += len; +		ppi.char_count -= len;  	} -	spin_lock_irqsave(&iface_stat_list_lock, flags); -	list_for_each_entry(iface_entry, &iface_stat_list, list) { + +	spin_lock_bh(&iface_stat_list_lock); +	list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {  		struct rb_node *node; -		spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2); -		for (node = rb_first(&iface_entry->tag_stat_tree); +		spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); +		for (node = rb_first(&ppi.iface_entry->tag_stat_tree);  		     node;  		     node = rb_next(node)) { -			ts_entry = rb_entry(node, struct tag_stat, node); -			if (item_index++ < items_to_skip) +			ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); +			if (ppi.item_index++ < items_to_skip)  				continue; -			len = print_stats_line(outp, char_count, -					       item_index, -					       iface_entry->ifname, -					       ts_entry->tag, -					       &ts_entry->counters); -			if (len >= char_count) { -				*outp = '\0'; -				spin_unlock_irqrestore( -					&iface_entry->tag_stat_list_lock, -					flags2); -				spin_unlock_irqrestore( -					&iface_stat_list_lock, flags); -				return outp - page; -			} -			if (len) { -				outp += len; -				char_count -= len; -				(*num_items_returned)++; +			if (!pp_sets(&ppi)) { +				spin_unlock_bh( +					&ppi.iface_entry->tag_stat_list_lock); +				spin_unlock_bh(&iface_stat_list_lock); +				return ppi.outp - page;  			}  		} -		spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, -				flags2); +		spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);  	} -	spin_unlock_irqrestore(&iface_stat_list_lock, flags); +	spin_unlock_bh(&iface_stat_list_lock);  	*eof = 1; -	return outp - page; +	return ppi.outp - page;  }  /*------------------------------------------*/ @@ -1444,7 +1950,7 @@ static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)  	int ret;  	*res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);  	if (!*res_procdir) { -		pr_err("xt_qtaguid: failed to create proc/.../xt_qtaguid\n"); +		pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");  		ret = -ENOMEM;  		goto no_dir;  	} @@ -1452,7 +1958,7 @@ static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)  	xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,  						*res_procdir);  	if (!xt_qtaguid_ctrl_file) { -		pr_err("xt_qtaguid: failed to create xt_qtaguid/ctrl " +		pr_err("qtaguid: failed to create xt_qtaguid/ctrl "  			" file\n");  		ret = -ENOMEM;  		goto no_ctrl_entry; @@ -1463,7 +1969,7 @@ static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)  	xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,  						*res_procdir);  	if (!xt_qtaguid_stats_file) { -		pr_err("xt_qtaguid: failed to create xt_qtaguid/stats " +		pr_err("qtaguid: failed to create xt_qtaguid/stats "  			"file\n");  		ret = -ENOMEM;  		goto no_stats_entry; @@ -1505,7 +2011,8 @@ static int __init qtaguid_mt_init(void)  	return 0;  } -/* TODO: allow unloading of the module. +/* + * TODO: allow unloading of the module.   * For now stats are permanent.   * Kconfig forces'y/n' and never an 'm'.   */ | 
