From dc8a82ad285dcd2831feb2fd8f7b41ce1f82e243 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:30:40 -0700 Subject: [IPV6]: Fix memory leak in cleanup_ipv6_mibs() The icmpv6msg mib statistics is not freed. This is almost not critical for current kernel, since ipv6 module is unloadable, but this can happen on load error and will happen every time we stop the network namespace (when we have one, of course). Signed-off-by: Pavel Emelyanov Acked-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bc929381fa4..1b1caf3aa1c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -747,6 +747,7 @@ static void cleanup_ipv6_mibs(void) { snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); snmp_mib_free((void **)udplite_stats_in6); } -- cgit v1.2.3 From fd9e63544cac30a34c951f0ec958038f0529e244 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:43:37 -0700 Subject: [INET]: Omit double hash calculations in xxx_frag_intern Since the hash value is already calculated in xxx_find, we can simply use it later. This is already done in netfilter code, so make the same in ipv4 and ipv6. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 11 ++++------- net/ipv6/reassembly.c | 11 +++++------ 2 files changed, 9 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 443b3f89192..d12a18b8f56 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -212,17 +212,14 @@ out: /* Creation primitives. */ -static struct ipq *ip_frag_intern(struct ipq *qp_in) +static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash) { struct ipq *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif - unsigned int hash; write_lock(&ip4_frags.lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, - qp_in->protocol); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -257,7 +254,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) +static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) { struct ipq *qp; @@ -278,7 +275,7 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) spin_lock_init(&qp->q.lock); atomic_set(&qp->q.refcnt, 1); - return ip_frag_intern(qp); + return ip_frag_intern(qp, h); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); @@ -313,7 +310,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) } read_unlock(&ip4_frags.lock); - return ip_frag_create(iph, user); + return ip_frag_create(iph, user, hash); } /* Is the fragment too far ahead to be part of ipq? */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6ad19cfc202..0a1bf43bd48 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -233,16 +233,15 @@ out: /* Creation primitives. */ -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) +static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, + unsigned int hash) { struct frag_queue *fq; - unsigned int hash; #ifdef CONFIG_SMP struct hlist_node *n; #endif write_lock(&ip6_frags.lock); - hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); #ifdef CONFIG_SMP hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { if (fq->id == fq_in->id && @@ -273,7 +272,7 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) static struct frag_queue * ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) + struct inet6_dev *idev, unsigned int hash) { struct frag_queue *fq; @@ -290,7 +289,7 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, spin_lock_init(&fq->q.lock); atomic_set(&fq->q.refcnt, 1); - return ip6_frag_intern(fq); + return ip6_frag_intern(fq, hash); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); @@ -318,7 +317,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, } read_unlock(&ip6_frags.lock); - return ip6_frag_create(id, src, dst, idev); + return ip6_frag_create(id, src, dst, idev, hash); } -- cgit v1.2.3 From 2588fe1d782f1686847493ad643157d5d10bf602 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:44:34 -0700 Subject: [INET]: Consolidate xxx_frag_intern This routine checks for the existence of a given entry in the hash table and inserts the new one if needed. The ->equal callback is used to compare two frag_queue-s together, but this one is temporary and will be removed later. The netfilter code and the ipv6 one use the same routine to compare frags. The inet_frag_intern() always returns non-NULL pointer, so convert the inet_frag_queue into protocol specific one (with the container_of) without any checks. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 4 +++ include/net/ipv6.h | 3 ++ net/ipv4/inet_fragment.c | 37 ++++++++++++++++++++++ net/ipv4/ip_fragment.c | 54 +++++++++++---------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 34 +++------------------ net/ipv6/reassembly.c | 46 ++++++++++------------------ 6 files changed, 82 insertions(+), 96 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 911c2cd0294..133e187fbc9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -41,6 +41,8 @@ struct inet_frags { unsigned int (*hashfn)(struct inet_frag_queue *); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); + int (*equal)(struct inet_frag_queue *q1, + struct inet_frag_queue *q2); }; void inet_frags_init(struct inet_frags *); @@ -50,6 +52,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); +struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, + struct inet_frags *f, unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cc796cbc1b2..ff126971346 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -377,6 +377,9 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, prefixlen); } +struct inet_frag_queue; +int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + static inline int ipv6_addr_any(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 484cf512858..15054eb3d4b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -172,3 +172,40 @@ int inet_frag_evictor(struct inet_frags *f) return evicted; } EXPORT_SYMBOL(inet_frag_evictor); + +struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, + struct inet_frags *f, unsigned int hash) +{ + struct inet_frag_queue *qp; +#ifdef CONFIG_SMP + struct hlist_node *n; +#endif + + write_lock(&f->lock); +#ifdef CONFIG_SMP + /* With SMP race we have to recheck hash table, because + * such entry could be created on other cpu, while we + * promoted read lock to write lock. + */ + hlist_for_each_entry(qp, n, &f->hash[hash], list) { + if (f->equal(qp, qp_in)) { + atomic_inc(&qp->refcnt); + write_unlock(&f->lock); + qp_in->last_in |= COMPLETE; + inet_frag_put(qp_in, f); + return qp; + } + } +#endif + qp = qp_in; + if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) + atomic_inc(&qp->refcnt); + + atomic_inc(&qp->refcnt); + hlist_add_head(&qp->list, &f->hash[hash]); + list_add_tail(&qp->lru_list, &f->lru_list); + f->nqueues++; + write_unlock(&f->lock); + return qp; +} +EXPORT_SYMBOL(inet_frag_intern); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d12a18b8f56..4b1bbbee22c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -123,6 +123,20 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q) return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } +static int ip4_frag_equal(struct inet_frag_queue *q1, + struct inet_frag_queue *q2) +{ + struct ipq *qp1, *qp2; + + qp1 = container_of(q1, struct ipq, q); + qp2 = container_of(q2, struct ipq, q); + return (qp1->id == qp2->id && + qp1->saddr == qp2->saddr && + qp1->daddr == qp2->daddr && + qp1->protocol == qp2->protocol && + qp1->user == qp2->user); +} + /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -214,43 +228,10 @@ out: static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash) { - struct ipq *qp; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + struct inet_frag_queue *q; - write_lock(&ip4_frags.lock); -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * promoted read lock to write lock. - */ - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { - if (qp->id == qp_in->id && - qp->saddr == qp_in->saddr && - qp->daddr == qp_in->daddr && - qp->protocol == qp_in->protocol && - qp->user == qp_in->user) { - atomic_inc(&qp->q.refcnt); - write_unlock(&ip4_frags.lock); - qp_in->q.last_in |= COMPLETE; - ipq_put(qp_in); - return qp; - } - } -#endif - qp = qp_in; - - if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) - atomic_inc(&qp->q.refcnt); - - atomic_inc(&qp->q.refcnt); - hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); - INIT_LIST_HEAD(&qp->q.lru_list); - list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list); - ip4_frags.nqueues++; - write_unlock(&ip4_frags.lock); - return qp; + q = inet_frag_intern(&qp_in->q, &ip4_frags, hash); + return container_of(q, struct ipq, q); } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ @@ -671,6 +652,7 @@ void __init ipfrag_init(void) ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); + ip4_frags.equal = ip4_frag_equal; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 726fafd4196..d7dc444ec48 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -187,37 +187,10 @@ out: static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, struct nf_ct_frag6_queue *fq_in) { - struct nf_ct_frag6_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&nf_frags.lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&nf_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); - return fq; - } - } -#endif - fq = fq_in; + struct inet_frag_queue *q; - if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); - - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &nf_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &nf_frags.lru_list); - nf_frags.nqueues++; - write_unlock(&nf_frags.lock); - return fq; + q = inet_frag_intern(&fq_in->q, &nf_frags, hash); + return container_of(q, struct nf_ct_frag6_queue, q); } @@ -752,6 +725,7 @@ int nf_ct_frag6_init(void) nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.equal = ip6_frag_equal; inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0a1bf43bd48..73ea204eaa6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -143,6 +143,18 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } +int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) +{ + struct frag_queue *fq1, *fq2; + + fq1 = container_of(q1, struct frag_queue, q); + fq2 = container_of(q2, struct frag_queue, q); + return (fq1->id == fq2->id && + ipv6_addr_equal(&fq2->saddr, &fq1->saddr) && + ipv6_addr_equal(&fq2->daddr, &fq1->daddr)); +} +EXPORT_SYMBOL(ip6_frag_equal); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -236,37 +248,10 @@ out: static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, unsigned int hash) { - struct frag_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&ip6_frags.lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&ip6_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); + struct inet_frag_queue *q; - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list); - ip6_frags.nqueues++; - write_unlock(&ip6_frags.lock); - return fq; + q = inet_frag_intern(&fq_in->q, &ip6_frags, hash); + return container_of(q, struct frag_queue, q); } @@ -699,5 +684,6 @@ void __init ipv6_frag_init(void) ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.equal = ip6_frag_equal; inet_frags_init(&ip6_frags); } -- cgit v1.2.3 From e521db9d790aaa60ae8920e21cb7faedc280fc36 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:45:23 -0700 Subject: [INET]: Consolidate xxx_frag_alloc() Just perform the kzalloc() allocation and setup common fields in the inet_frag_queue(). Then return the result to the caller to initialize the rest. The inet_frag_alloc() may return NULL, so check the return value before doing the container_of(). This looks ugly, but the xxx_frag_alloc() will be removed soon. The xxx_expire() timer callbacks are patches, because the argument is now the inet_frag_queue, not the protocol specific queue. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ net/ipv4/inet_fragment.c | 17 +++++++++++++++++ net/ipv4/ip_fragment.c | 20 +++++++------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++----------- net/ipv6/reassembly.c | 19 +++++++------------ 5 files changed, 41 insertions(+), 36 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 133e187fbc9..412b8582a61 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,6 +43,7 @@ struct inet_frags { void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + void (*frag_expire)(unsigned long data); }; void inet_frags_init(struct inet_frags *); @@ -54,6 +55,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, int inet_frag_evictor(struct inet_frags *f); struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, struct inet_frags *f, unsigned int hash); +struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 15054eb3d4b..57e15fa307d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -209,3 +209,20 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, return qp; } EXPORT_SYMBOL(inet_frag_intern); + +struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) +{ + struct inet_frag_queue *q; + + q = kzalloc(f->qsize, GFP_ATOMIC); + if (q == NULL) + return NULL; + + atomic_add(f->qsize, &f->mem); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + spin_lock_init(&q->lock); + atomic_set(&q->refcnt, 1); + + return q; +} +EXPORT_SYMBOL(inet_frag_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4b1bbbee22c..fc0d530df52 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -158,12 +158,10 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q) static __inline__ struct ipq *frag_alloc_queue(void) { - struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC); + struct inet_frag_queue *q; - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip4_frags.mem); - return qp; + q = inet_frag_alloc(&ip4_frags); + return q ? container_of(q, struct ipq, q) : NULL; } @@ -199,7 +197,9 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); spin_lock(&qp->q.lock); @@ -249,13 +249,6 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) qp->user = user; qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - /* Initialize a timer for this entry. */ - init_timer(&qp->q.timer); - qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ - qp->q.timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->q.lock); - atomic_set(&qp->q.refcnt, 1); - return ip_frag_intern(qp, h); out_nomem: @@ -653,6 +646,7 @@ void __init ipfrag_init(void) ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.equal = ip4_frag_equal; + ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d7dc444ec48..3f8c16b3301 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -137,13 +137,10 @@ static void nf_frag_free(struct inet_frag_queue *q) static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) { - struct nf_ct_frag6_queue *fq; + struct inet_frag_queue *q; - fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - if (fq == NULL) - return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem); - return fq; + q = inet_frag_alloc(&nf_frags); + return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; } /* Destruction primitives. */ @@ -168,7 +165,10 @@ static void nf_ct_frag6_evictor(void) static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + struct nf_ct_frag6_queue *fq; + + fq = container_of((struct inet_frag_queue *)data, + struct nf_ct_frag6_queue, q); spin_lock(&fq->q.lock); @@ -208,10 +208,6 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return nf_ct_frag6_intern(hash, fq); oom: @@ -726,6 +722,7 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.equal = ip6_frag_equal; + nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 73ea204eaa6..21913c78f05 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -171,12 +171,10 @@ static void ip6_frag_free(struct inet_frag_queue *fq) static inline struct frag_queue *frag_alloc_queue(void) { - struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC); + struct inet_frag_queue *q; - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frags.mem); - return fq; + q = inet_frag_alloc(&ip6_frags); + return q ? container_of(q, struct frag_queue, q) : NULL; } /* Destruction primitives. */ @@ -205,9 +203,11 @@ static void ip6_evictor(struct inet6_dev *idev) static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq = (struct frag_queue *) data; + struct frag_queue *fq; struct net_device *dev = NULL; + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + spin_lock(&fq->q.lock); if (fq->q.last_in & COMPLETE) @@ -268,12 +268,6 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - init_timer(&fq->q.timer); - fq->q.timer.function = ip6_frag_expire; - fq->q.timer.data = (long) fq; - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return ip6_frag_intern(fq, hash); oom: @@ -685,5 +679,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.equal = ip6_frag_equal; + ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.3 From c6fda282294da882f8d8cc4c513940277dd380f5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:46:47 -0700 Subject: [INET]: Consolidate xxx_frag_create() This one uses the xxx_frag_intern() and xxx_frag_alloc() routines, which are already consolidated, so remove them from protocol code (as promised). The ->constructor callback is used to init the rest of the frag queue and it is the same for netfilter and ipv6. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +++-- include/net/ipv6.h | 8 +++++ net/ipv4/inet_fragment.c | 20 +++++++++--- net/ipv4/ip_fragment.c | 54 +++++++++++++++++---------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 39 ++++++++---------------- net/ipv6/reassembly.c | 45 ++++++++++++--------------- 6 files changed, 88 insertions(+), 85 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 412b8582a61..e33072b9fd9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -39,6 +39,8 @@ struct inet_frags { struct inet_frags_ctl *ctl; unsigned int (*hashfn)(struct inet_frag_queue *); + void (*constructor)(struct inet_frag_queue *q, + void *arg); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, @@ -53,9 +55,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); -struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *q, - struct inet_frags *f, unsigned int hash); -struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f); +struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *create_arg, unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ff126971346..9dc99bf5cf0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -380,6 +380,14 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, struct inet_frag_queue; int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); +struct ip6_create_arg { + __be32 id; + struct in6_addr *src; + struct in6_addr *dst; +}; + +void ip6_frag_init(struct inet_frag_queue *q, void *a); + static inline int ipv6_addr_any(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 57e15fa307d..b531f803cda 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -173,7 +173,7 @@ int inet_frag_evictor(struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_evictor); -struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, +static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, struct inet_frags *f, unsigned int hash) { struct inet_frag_queue *qp; @@ -208,9 +208,8 @@ struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, write_unlock(&f->lock); return qp; } -EXPORT_SYMBOL(inet_frag_intern); -struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) +static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -218,6 +217,7 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) if (q == NULL) return NULL; + f->constructor(q, arg); atomic_add(f->qsize, &f->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); @@ -225,4 +225,16 @@ struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f) return q; } -EXPORT_SYMBOL(inet_frag_alloc); + +struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, + unsigned int hash) +{ + struct inet_frag_queue *q; + + q = inet_frag_alloc(f, arg); + if (q == NULL) + return NULL; + + return inet_frag_intern(q, f, hash); +} +EXPORT_SYMBOL(inet_frag_create); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fc0d530df52..0d6cff1de5a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -108,6 +108,11 @@ int ip_frag_mem(void) static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev); +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, @@ -146,6 +151,20 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } +static void ip4_frag_init(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; +} + static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { struct ipq *qp; @@ -156,14 +175,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q) kfree(qp); } -static __inline__ struct ipq *frag_alloc_queue(void) -{ - struct inet_frag_queue *q; - - q = inet_frag_alloc(&ip4_frags); - return q ? container_of(q, struct ipq, q) : NULL; -} - /* Destruction primitives. */ @@ -226,30 +237,20 @@ out: /* Creation primitives. */ -static struct ipq *ip_frag_intern(struct ipq *qp_in, unsigned int hash) -{ - struct inet_frag_queue *q; - - q = inet_frag_intern(&qp_in->q, &ip4_frags, hash); - return container_of(q, struct ipq, q); -} - /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) { - struct ipq *qp; + struct inet_frag_queue *q; + struct ip4_create_arg arg; - if ((qp = frag_alloc_queue()) == NULL) - goto out_nomem; + arg.iph = iph; + arg.user = user; - qp->protocol = iph->protocol; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; + q = inet_frag_create(&ip4_frags, &arg, h); + if (q == NULL) + goto out_nomem; - return ip_frag_intern(qp, h); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); @@ -642,6 +643,7 @@ void __init ipfrag_init(void) { ip4_frags.ctl = &ip4_frags_ctl; ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3f8c16b3301..127d1d84278 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -135,14 +135,6 @@ static void nf_frag_free(struct inet_frag_queue *q) kfree(container_of(q, struct nf_ct_frag6_queue, q)); } -static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) -{ - struct inet_frag_queue *q; - - q = inet_frag_alloc(&nf_frags); - return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -184,33 +176,25 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, - struct nf_ct_frag6_queue *fq_in) +static struct nf_ct_frag6_queue * +nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; + struct ip6_create_arg arg; - q = inet_frag_intern(&fq_in->q, &nf_frags, hash); - return container_of(q, struct nf_ct_frag6_queue, q); -} - - -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; + arg.id = id; + arg.src = src; + arg.dst = dst; - if ((fq = frag_alloc_queue()) == NULL) { - pr_debug("Can't alloc new queue\n"); + q = inet_frag_create(&nf_frags, &arg, hash); + if (q == NULL) goto oom; - } - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - return nf_ct_frag6_intern(hash, fq); + return container_of(q, struct nf_ct_frag6_queue, q); oom: + pr_debug("Can't alloc new queue\n"); return NULL; } @@ -718,6 +702,7 @@ int nf_ct_frag6_init(void) { nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; + nf_frags.constructor = ip6_frag_init; nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 21913c78f05..ce8734028d9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -164,17 +164,20 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } -static void ip6_frag_free(struct inet_frag_queue *fq) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - kfree(container_of(fq, struct frag_queue, q)); + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; + + fq->id = arg->id; + ipv6_addr_copy(&fq->saddr, arg->src); + ipv6_addr_copy(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_init); -static inline struct frag_queue *frag_alloc_queue(void) +static void ip6_frag_free(struct inet_frag_queue *fq) { - struct inet_frag_queue *q; - - q = inet_frag_alloc(&ip6_frags); - return q ? container_of(q, struct frag_queue, q) : NULL; + kfree(container_of(fq, struct frag_queue, q)); } /* Destruction primitives. */ @@ -244,31 +247,22 @@ out: /* Creation primitives. */ - -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, - unsigned int hash) -{ - struct inet_frag_queue *q; - - q = inet_frag_intern(&fq_in->q, &ip6_frags, hash); - return container_of(q, struct frag_queue, q); -} - - static struct frag_queue * ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev, unsigned int hash) { - struct frag_queue *fq; + struct inet_frag_queue *q; + struct ip6_create_arg arg; - if ((fq = frag_alloc_queue()) == NULL) - goto oom; + arg.id = id; + arg.src = src; + arg.dst = dst; - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); + q = inet_frag_create(&ip6_frags, &arg, hash); + if (q == NULL) + goto oom; - return ip6_frag_intern(fq, hash); + return container_of(q, struct frag_queue, q); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); @@ -675,6 +669,7 @@ void __init ipv6_frag_init(void) ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); -- cgit v1.2.3 From abd6523d15f40bfee14652619a31a7f65f77f581 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:21 -0700 Subject: [INET]: Consolidate xxx_find() in fragment management Here we need another callback ->match to check whether the entry found in hash matches the key passed. The key used is the same as the creation argument for inet_frag_create. Yet again, this ->match is the same for netfilter and ipv6. Running a frew steps forward - this callback will later replace the ->equal one. Since the inet_frag_find() uses the already consolidated inet_frag_create() remove the xxx_frag_create from protocol codes. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 6 ++-- include/net/ipv6.h | 1 + net/ipv4/inet_fragment.c | 25 +++++++++++++-- net/ipv4/ip_fragment.c | 57 ++++++++++++--------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++++-------------- net/ipv6/reassembly.c | 50 +++++++++++------------------ 6 files changed, 73 insertions(+), 98 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e33072b9fd9..64299266a86 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -45,6 +45,8 @@ struct inet_frags { void (*skb_free)(struct sk_buff *); int (*equal)(struct inet_frag_queue *q1, struct inet_frag_queue *q2); + int (*match)(struct inet_frag_queue *q, + void *arg); void (*frag_expire)(unsigned long data); }; @@ -55,8 +57,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); int inet_frag_evictor(struct inet_frags *f); -struct inet_frag_queue *inet_frag_create(struct inet_frags *f, - void *create_arg, unsigned int hash); +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9dc99bf5cf0..005853a33ef 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -387,6 +387,7 @@ struct ip6_create_arg { }; void ip6_frag_init(struct inet_frag_queue *q, void *a); +int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b531f803cda..6ba98ebbed9 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -226,8 +226,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) return q; } -struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, - unsigned int hash) +static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *arg, unsigned int hash) { struct inet_frag_queue *q; @@ -237,4 +237,23 @@ struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, return inet_frag_intern(q, f, hash); } -EXPORT_SYMBOL(inet_frag_create); + +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash) +{ + struct inet_frag_queue *q; + struct hlist_node *n; + + read_lock(&f->lock); + hlist_for_each_entry(q, n, &f->hash[hash], list) { + if (f->match(q, key)) { + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + return q; + } + } + read_unlock(&f->lock); + + return inet_frag_create(f, key, hash); +} +EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0d6cff1de5a..928259dbc0f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -142,6 +142,19 @@ static int ip4_frag_equal(struct inet_frag_queue *q1, qp1->user == qp2->user); } +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); +} + /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -235,18 +248,20 @@ out: ipq_put(qp); } -/* Creation primitives. */ - -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user, unsigned int h) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { struct inet_frag_queue *q; struct ip4_create_arg arg; + unsigned int hash; arg.iph = iph; arg.user = user; + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - q = inet_frag_create(&ip4_frags, &arg, h); + q = inet_frag_find(&ip4_frags, &arg, hash); if (q == NULL) goto out_nomem; @@ -257,37 +272,6 @@ out_nomem: return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ip4_frags.lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->q.refcnt); - read_unlock(&ip4_frags.lock); - return qp; - } - } - read_unlock(&ip4_frags.lock); - - return ip_frag_create(iph, user, hash); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -648,6 +632,7 @@ void __init ipfrag_init(void) ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.equal = ip4_frag_equal; + ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 127d1d84278..bff63d79c64 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -176,18 +176,19 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, - struct in6_addr *dst) +static __inline__ struct nf_ct_frag6_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_frags, &arg, hash); if (q == NULL) goto oom; @@ -198,28 +199,6 @@ oom: return NULL; } -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&nf_frags.lock); - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&nf_frags.lock); - return fq; - } - } - read_unlock(&nf_frags.lock); - - return nf_ct_frag6_create(hash, id, src, dst); -} - static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) @@ -706,6 +685,7 @@ int nf_ct_frag6_init(void) nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.match = ip6_frag_match; nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ce8734028d9..11fffe791fc 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -155,6 +155,18 @@ int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) } EXPORT_SYMBOL(ip6_frag_equal); +int ip6_frag_match(struct inet_frag_queue *q, void *a) +{ + struct frag_queue *fq; + struct ip6_create_arg *arg = a; + + fq = container_of(q, struct frag_queue, q); + return (fq->id == arg->id && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst)); +} +EXPORT_SYMBOL(ip6_frag_match); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -245,20 +257,20 @@ out: fq_put(fq); } -/* Creation primitives. */ - -static struct frag_queue * -ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev, unsigned int hash) +static __inline__ struct frag_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&ip6_frags, &arg, hash); + q = inet_frag_find(&ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -269,31 +281,6 @@ oom: return NULL; } -static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash; - - read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&ip6_frags.lock); - return fq; - } - } - read_unlock(&ip6_frags.lock); - - return ip6_frag_create(id, src, dst, idev, hash); -} - - static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { @@ -673,6 +660,7 @@ void __init ipv6_frag_init(void) ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); -- cgit v1.2.3 From 48d60056387c37a17a46feda48613587a90535e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:56 -0700 Subject: [INET]: Remove no longer needed ->equal callback Since this callback is used to check for conflicts in hashtable when inserting a newly created frag queue, we can do the same by checking for matching the queue with the argument, used to create one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 -- include/net/ipv6.h | 1 - net/ipv4/inet_fragment.c | 6 +++--- net/ipv4/ip_fragment.c | 15 --------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - net/ipv6/reassembly.c | 13 ------------- 6 files changed, 3 insertions(+), 35 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 64299266a86..954def40897 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,8 +43,6 @@ struct inet_frags { void *arg); void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); - int (*equal)(struct inet_frag_queue *q1, - struct inet_frag_queue *q2); int (*match)(struct inet_frag_queue *q, void *arg); void (*frag_expire)(unsigned long data); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 005853a33ef..ae328b680ff 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -378,7 +378,6 @@ static inline int ipv6_prefix_equal(const struct in6_addr *a1, } struct inet_frag_queue; -int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2); struct ip6_create_arg { __be32 id; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6ba98ebbed9..3ed09dd9344 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -174,7 +174,7 @@ int inet_frag_evictor(struct inet_frags *f) EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, - struct inet_frags *f, unsigned int hash) + struct inet_frags *f, unsigned int hash, void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP @@ -188,7 +188,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, * promoted read lock to write lock. */ hlist_for_each_entry(qp, n, &f->hash[hash], list) { - if (f->equal(qp, qp_in)) { + if (f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); qp_in->last_in |= COMPLETE; @@ -235,7 +235,7 @@ static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, if (q == NULL) return NULL; - return inet_frag_intern(q, f, hash); + return inet_frag_intern(q, f, hash, arg); } struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 928259dbc0f..314593b2050 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -128,20 +128,6 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q) return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -static int ip4_frag_equal(struct inet_frag_queue *q1, - struct inet_frag_queue *q2) -{ - struct ipq *qp1, *qp2; - - qp1 = container_of(q1, struct ipq, q); - qp2 = container_of(q2, struct ipq, q); - return (qp1->id == qp2->id && - qp1->saddr == qp2->saddr && - qp1->daddr == qp2->daddr && - qp1->protocol == qp2->protocol && - qp1->user == qp2->user); -} - static int ip4_frag_match(struct inet_frag_queue *q, void *a) { struct ipq *qp; @@ -631,7 +617,6 @@ void __init ipfrag_init(void) ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); - ip4_frags.equal = ip4_frag_equal; ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; inet_frags_init(&ip4_frags); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bff63d79c64..25746d31504 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -686,7 +686,6 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; - nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 11fffe791fc..01766bc75b6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -143,18 +143,6 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } -int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) -{ - struct frag_queue *fq1, *fq2; - - fq1 = container_of(q1, struct frag_queue, q); - fq2 = container_of(q2, struct frag_queue, q); - return (fq1->id == fq2->id && - ipv6_addr_equal(&fq2->saddr, &fq1->saddr) && - ipv6_addr_equal(&fq2->daddr, &fq1->daddr)); -} -EXPORT_SYMBOL(ip6_frag_equal); - int ip6_frag_match(struct inet_frag_queue *q, void *a) { struct frag_queue *fq; @@ -661,7 +649,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; - ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.3 From c95477090a2ace6d241c184adc3fbfcab9c61ceb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:48:26 -0700 Subject: [INET]: Consolidate frag queues freeing Since we now allocate the queues in inet_fragment.c, we can safely free it in the same place. The ->destructor callback thus becomes optional for inet_frags. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 4 +++- net/ipv4/ip_fragment.c | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 7 +------ net/ipv6/reassembly.c | 7 +------ 4 files changed, 5 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 3ed09dd9344..e15e04fc666 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -136,7 +136,9 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, *work -= f->qsize; atomic_sub(f->qsize, &f->mem); - f->destructor(q); + if (f->destructor) + f->destructor(q); + kfree(q); } EXPORT_SYMBOL(inet_frag_destroy); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 314593b2050..453ae041edd 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,7 +171,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q) qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); - kfree(qp); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 25746d31504..e170c67c47a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -130,11 +130,6 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) kfree_skb(skb); } -static void nf_frag_free(struct inet_frag_queue *q) -{ - kfree(container_of(q, struct nf_ct_frag6_queue, q)); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -682,7 +677,7 @@ int nf_ct_frag6_init(void) nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; - nf_frags.destructor = nf_frag_free; + nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 01766bc75b6..76c88a93b9b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -175,11 +175,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -static void ip6_frag_free(struct inet_frag_queue *fq) -{ - kfree(container_of(fq, struct frag_queue, q)); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct frag_queue *fq) @@ -645,7 +640,7 @@ void __init ipv6_frag_init(void) ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; - ip6_frags.destructor = ip6_frag_free; + ip6_frags.destructor = NULL; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; -- cgit v1.2.3 From 16910b9829797cda4032fbc84e5292ac7b4474f7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:23:43 -0700 Subject: [IPV6]: Fix return type for snmp6_free_dev() This call is essentially void. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52d10d21321..edf06ca3474 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -283,12 +283,11 @@ err_ip: return err; } -static int snmp6_free_dev(struct inet6_dev *idev) +static void snmp6_free_dev(struct inet6_dev *idev) { snmp_mib_free((void **)idev->stats.icmpv6msg); snmp_mib_free((void **)idev->stats.icmpv6); snmp_mib_free((void **)idev->stats.ipv6); - return 0; } /* Nobody refers to this device, we may destroy it. */ -- cgit v1.2.3 From aaf70ec7fde2321281b2a49c7c9f881c90d0d208 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:25:32 -0700 Subject: [IPV6]: Cleanup snmp6_alloc_dev() This functions is never called with NULL or not setup argument, so the checks inside are redundant. Also, the return value is always -ENOMEM, so no need in additional variable for this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index edf06ca3474..348bd8d0611 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -255,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { - int err = -ENOMEM; - - if (!idev || !idev->dev) - return -EINVAL; - if (snmp_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) @@ -280,7 +275,7 @@ err_icmpmsg: err_icmp: snmp_mib_free((void **)idev->stats.ipv6); err_ip: - return err; + return -ENOMEM; } static void snmp6_free_dev(struct inet6_dev *idev) -- cgit v1.2.3 From 04663d0b8b3c8ce3804106279420cfe5bdfcce3c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:28:06 -0700 Subject: [IPSEC]: Fix pure tunnel modes involving IPv6 I noticed that my recent patch broke 6-on-4 pure IPsec tunnels (the ones that are only used for incompressible IPsec packets). Subsequent reviews show that I broke 6-on-6 pure tunnels more than three years ago and nobody ever noticed. I suppose every must be testing 6-on-6 IPComp with large pings which are very compressible :) This patch fixes both cases. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 1312417608e..83e9580feac 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -18,7 +18,7 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) { - return IPPROTO_IP; + return ip_hdr(skb)->protocol; } static int ipip_init_state(struct xfrm_state *x) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 3f8a3abde67..6c67ac197ee 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -248,7 +248,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return skb_network_header(skb)[IP6CB(skb)->nhoff]; } static int xfrm6_tunnel_rcv(struct sk_buff *skb) -- cgit v1.2.3 From 33b5ecb8f64706d1ed472dcb44162ab3a7345724 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:29:25 -0700 Subject: [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi Currently xfrm6_rcv_spi gets the nexthdr value itself from the packet. This means that we need to fix up the value in case we have a 4-on-6 tunnel. Moving this logic into the caller simplifies things and allows us to merge the code with IPv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv6/xfrm6_input.c | 9 ++++----- net/ipv6/xfrm6_tunnel.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 680739f6900..d8974ca1903 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1058,7 +1058,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); -extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi); +extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); extern int xfrm6_rcv(struct sk_buff *skb); extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 02f69e544f6..596a730294e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include #include -int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; __be32 seq; @@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int nexthdr; unsigned int nhoff; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) @@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6); + nexthdr, AF_INET6); if (x == NULL) goto drop; spin_lock(&x->lock); @@ -135,7 +133,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(skb, 0); + return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6c67ac197ee..fae90ff3108 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.2.3 From 7aa68cb90638ccc36559a936814e4c089892b3d9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:30:07 -0700 Subject: [IPSEC]: Move ip_summed zapping out of xfrm6_rcv_spi Not every transform needs to zap ip_summed. For example, a pure tunnel mode encapsulation does not affect the hardware checksum at all. In fact, every algorithm (that needs this) other than AH6 already does its own ip_summed zapping. This patch moves the zapping into AH6 which is in line with what IPv4 does. Possible future optimisation: Checksum the data as we copy them in IPComp. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 ++ net/ipv6/xfrm6_input.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f9f68916269..a8221d1da0f 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -344,6 +344,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto out; + skb->ip_summed = CHECKSUM_NONE; + hdr_len = skb->data - skb_network_header(skb); ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 596a730294e..b1201c33eb1 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -97,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, xfrm_nr * sizeof(xfrm_vec[0])); skb->sp->len += xfrm_nr; - skb->ip_summed = CHECKSUM_NONE; nf_reset(skb); -- cgit v1.2.3 From 1bfcb10f670f5ff5e1d9f53e59680573524cb142 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:31:50 -0700 Subject: [IPSEC]: Add missing BEET checks Currently BEET mode does not reinject the packet back into the stack like tunnel mode does. Since BEET should behave just like tunnel mode this is incorrect. This patch fixes this by introducing a flags field to xfrm_mode that tells the IPsec code whether it should terminate and reinject the packet back into the stack. It then sets the flag for BEET and tunnel mode. I've also added a number of missing BEET checks elsewhere where we check whether a given mode is a tunnel or not. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_beet.c | 1 + net/ipv4/xfrm4_mode_tunnel.c | 1 + net/ipv4/xfrm4_output.c | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 1 + net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 3 +-- net/ipv6/xfrm6_state.c | 6 ++++-- net/xfrm/xfrm_output.c | 2 +- net/xfrm/xfrm_policy.c | 6 ++++-- 14 files changed, 25 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7f156a0b94c..2143f2911a2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -314,6 +314,12 @@ struct xfrm_mode { struct module *owner; unsigned int encap; + int flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, }; extern int xfrm_register_mode(struct xfrm_mode *mode, int family); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 5cb0b5995bc..bc5dc0747cd 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -94,7 +94,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 73d2338bec5..e42e122414b 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -114,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = { .output = xfrm4_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_beet_init(void) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 1ae9d32276f..e4deecba6dd 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -139,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = { .output = xfrm4_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_tunnel_init(void) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index a4edd666318..dcbc2743069 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb) struct iphdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 329825ca68f..2373d673df6 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch (encap_family) { case AF_INET: diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b1201c33eb1..c6ee1a3ba19 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -71,7 +71,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 13bb1e85676..2bfb4f05c14 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = { .output = xfrm6_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_beet_init(void) diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index ea228387911..fd84e221727 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = { .output = xfrm6_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index a5a32c17249..c9f42d1c2df 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 15aa4c58c31..dc4bdcb55cb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL || - xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index cdadb484746..e644c80515f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { + (src[i]->props.mode == XFRM_MODE_TUNNEL || + src[i]->props.mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { + (src[i]->mode == XFRM_MODE_TUNNEL || + src[i]->mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0eb3377602e..8bf71ba2345 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + } while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL)); err = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ca24c90d379..1d66fb42c9c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1940,7 +1940,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (xdst->genid != dst->xfrm->genid) return 0; - if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + if (strict && fl && + !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; @@ -2291,7 +2292,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) continue; n++; - if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && + pol->xfrm_vec[i].mode != XFRM_MODE_BEET) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, -- cgit v1.2.3 From 17c2a42a24e1e8dd6aa7cea4f84e034ab1bfff31 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:33:12 -0700 Subject: [IPSEC]: Store afinfo pointer in xfrm_mode It is convenient to have a pointer from xfrm_state to address-specific functions such as the output function for a family. Currently the address-specific policy code calls out to the xfrm state code to get those pointers when we could get it in an easier way via the state itself. This patch adds an xfrm_state_afinfo to xfrm_mode (since they're address-specific) and changes the policy code to use it. I've also added an owner field to do reference counting on the module providing the afinfo even though it isn't strictly necessary today since IPv6 can't be unloaded yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +++--- net/ipv4/xfrm4_policy.c | 13 +------------ net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_policy.c | 14 +------------- net/ipv6/xfrm6_state.c | 1 + net/xfrm/xfrm_state.c | 26 +++++++++++++++++--------- 6 files changed, 24 insertions(+), 37 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2143f2911a2..f0f3318f655 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -253,7 +253,8 @@ extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid); extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned short family; + unsigned int family; + struct module *owner; struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); @@ -267,8 +268,6 @@ struct xfrm_state_afinfo { extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); -extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -312,6 +311,7 @@ struct xfrm_mode { */ int (*output)(struct xfrm_state *x,struct sk_buff *skb); + struct xfrm_state_afinfo *afinfo; struct module *owner; unsigned int encap; int flags; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2373d673df6..c65b8e03c04 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -169,17 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv6 module can be unloaded, we should manage reference - * to xfrm6_output in afinfo->output. Miyazawa - * */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - err = -EAFNOSUPPORT; - goto error; - } - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->mode->afinfo->output; if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 93e2c061cdd..13d54a1c333 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, + .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .output = xfrm4_output, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dc4bdcb55cb..324268329f6 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -231,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv4 is implemented as module and can be unloaded, - * we should manage reference to xfrm4_output in afinfo->output. - * Miyazawa - */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - goto error; - } - - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index e644c80515f..b392bee396f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -170,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index dc438f2b944..48b4a06b3d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -57,6 +57,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); + static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -289,11 +292,18 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) err = -EEXIST; modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == NULL)) { - modemap[mode->encap] = mode; - err = 0; - } + if (modemap[mode->encap]) + goto out; + err = -ENOENT; + if (!try_module_get(afinfo->owner)) + goto out; + + mode->afinfo = afinfo; + modemap[mode->encap] = mode; + err = 0; + +out: xfrm_state_unlock_afinfo(afinfo); return err; } @@ -316,6 +326,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) modemap = afinfo->mode_map; if (likely(modemap[mode->encap] == mode)) { modemap[mode->encap] = NULL; + module_put(mode->afinfo->owner); err = 0; } @@ -1869,7 +1880,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1881,14 +1892,11 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } -EXPORT_SYMBOL(xfrm_state_get_afinfo); -EXPORT_SYMBOL(xfrm_state_put_afinfo); - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { -- cgit v1.2.3 From ca68145f16359f71cd62b2671aa3e8c58f45ef19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:15 -0700 Subject: [IPSEC]: Disallow combinations of RO and AH/ESP/IPCOMP Combining RO and AH/ESP/IPCOMP does not make sense. So this patch adds a check in the state initialisation function to prevent this. This allows us to safely remove the mode input function of RO since it can never be called anymore. Indeed, if somehow it does get called we'll know about it through an OOPS instead of it slipping past silently. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 9 ++++++++- net/ipv6/esp6.c | 9 ++++++++- net/ipv6/ipcomp6.c | 9 ++++++++- net/ipv6/xfrm6_mode_ro.c | 9 --------- 4 files changed, 24 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index a8221d1da0f..67cd06613a2 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -477,8 +477,15 @@ static int ah6_init_state(struct xfrm_state *x) x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } x->data = ahp; return 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9eb92859835..b0715432e45 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,8 +354,15 @@ static int esp6_init_state(struct xfrm_state *x) (x->ealg->alg_key_len + 7) / 8)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } x->data = esp; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 28fc8edfdc3..80ef2a1d39f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -411,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 957ae36b669..a7bc8c62317 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -58,16 +58,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -/* - * Do nothing about routing optimization header unlike IPsec. - */ -static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return 0; -} - static struct xfrm_mode xfrm6_ro_mode = { - .input = xfrm6_ro_input, .output = xfrm6_ro_output, .owner = THIS_MODULE, .encap = XFRM_MODE_ROUTEOPTIMIZATION, -- cgit v1.2.3 From 13996378e6585fb25e582afe7489bf52dde78deb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:51 -0700 Subject: [IPSEC]: Rename mode to outer_mode and add inner_mode This patch adds a new field to xfrm states called inner_mode. The existing mode object is renamed to outer_mode. This is the first part of an attempt to fix inter-family transforms. As it is we always use the outer family when determining which mode to use. As a result we may end up shoving IPv4 packets into netfilter6 and vice versa. What we really want is to use the inner family for the first part of outbound processing and the outer family for the second part. For inbound processing we'd use the opposite pairing. I've also added a check to prevent silly combinations such as transport mode with inter-family transforms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/core/pktgen.c | 2 +- net/ipv4/xfrm4_input.c | 4 ++-- net/ipv4/xfrm4_output.c | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_input.c | 4 ++-- net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 18 ++++++++++++++---- 11 files changed, 28 insertions(+), 17 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0f3318f655..688f6f5d328 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -186,7 +186,8 @@ struct xfrm_state /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; - struct xfrm_mode *mode; + struct xfrm_mode *inner_mode; + struct xfrm_mode *outer_mode; /* Security context */ struct xfrm_sec_ctx *security; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2100c734b10..8cae60c5338 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) spin_lock(&x->lock); iph = ip_hdr(skb); - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; err = x->type->output(x, skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bc5dc0747cd..5e95c8a07ef 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -91,10 +91,10 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index dcbc2743069..c4a7156962b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb) struct iphdr *iph; int err; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1f0ea0e0371..cc86fb110dd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -168,7 +168,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->mode->afinfo->output; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; if (rt0->peer) atomic_inc(&rt0->peer->refcnt); x->u.rt.peer = rt0->peer; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c6ee1a3ba19..515783707e8 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -68,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c9f42d1c2df..656976760ad 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 324268329f6..82e27b80d07 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -230,7 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->mode->afinfo->output; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8bf71ba2345..f4bfd6c4565 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -63,7 +63,7 @@ int xfrm_output(struct sk_buff *skb) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; @@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !(x->mode->flags & XFRM_MODE_FLAG_TUNNEL)); + } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); err = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1d66fb42c9c..b702bd8a389 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1941,7 +1941,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 0; if (strict && fl && - !(dst->xfrm->mode->flags & XFRM_MODE_FLAG_TUNNEL) && + !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 48b4a06b3d1..224b44e31a0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -377,8 +377,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); - if (x->mode) - xfrm_put_mode(x->mode); + if (x->inner_mode) + xfrm_put_mode(x->inner_mode); + if (x->outer_mode) + xfrm_put_mode(x->outer_mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -1947,6 +1949,14 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; + x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (x->inner_mode == NULL) + goto error; + + if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) + goto error; + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; @@ -1955,8 +1965,8 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; - x->mode = xfrm_get_mode(x->props.mode, family); - if (x->mode == NULL) + x->outer_mode = xfrm_get_mode(x->props.mode, family); + if (x->outer_mode == NULL) goto error; x->km.state = XFRM_STATE_VALID; -- cgit v1.2.3 From 04028045a12ba941c579d0f3238489333ac18ea4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:14:58 -0700 Subject: [IPV6]: Lost locking when inserting a flowlabel in ipv6_fl_list The new flowlabels should be inserted into the sock list under the ip6_sk_fl_lock. This was lost in one place. This list is naturally protected with the socket lock, but the fl6_sock_lookup() is called without it, so another protection is required. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 217d60f9fc8..8550df20f98 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -409,6 +409,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) return 0; } +static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) +{ + write_lock_bh(&ip6_sk_fl_lock); + sfl->fl = fl; + sfl->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl; + write_unlock_bh(&ip6_sk_fl_lock); +} + int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int err; @@ -513,11 +523,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - write_lock_bh(&ip6_sk_fl_lock); - sfl1->fl = fl1; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; - write_unlock_bh(&ip6_sk_fl_lock); + fl_link(np, sfl1, fl1); fl_free(fl); return 0; @@ -545,9 +551,7 @@ release: } } - sfl1->fl = fl; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; + fl_link(np, sfl1, fl); return 0; default: -- cgit v1.2.3 From bd0bf57700cb0eaa92f3d2ee040a69743cdd99d0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:15:57 -0700 Subject: [IPV6]: Lost locking in fl6_sock_lookup This routine scans the ipv6_fl_list whose update is protected with the socket lock and the ip6_sk_fl_lock. Since the socket lock is not taken in the lookup, use the other one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 8550df20f98..f40a08669db 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -190,14 +190,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; + read_lock_bh(&ip6_sk_fl_lock); for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { + read_unlock_bh(&ip6_sk_fl_lock); fl->lastuse = jiffies; atomic_inc(&fl->users); return fl; } } + read_unlock_bh(&ip6_sk_fl_lock); return NULL; } -- cgit v1.2.3 From 78c2e50253569e62caa4a61fc1cc5a0158edec43 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:18:56 -0700 Subject: [IPV6]: Fix race in ipv6_flowlabel_opt() when inserting two labels In the IPV6_FL_A_GET case the hash is checked for flowlabels with the given label. If it is not found, the lock, protecting the hash, is dropped to be re-get for writing. After this a newly allocated entry is inserted, but no checks are performed to catch a classical SMP race, when the conflicting label may be inserted on another cpu. Use the (currently unused) return value from fl_intern() to return the conflicting entry (if found) and re-check, whether we can reuse it (IPV6_FL_F_EXCL) or return -EEXISTS. Also add the comment, about why not re-lookup the current sock for conflicting flowlabel entry. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f40a08669db..e55ae1a1f56 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -154,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy) write_unlock(&ip6_fl_lock); } -static int fl_intern(struct ip6_flowlabel *fl, __be32 label) +static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) { + struct ip6_flowlabel *lfl; + fl->label = label & IPV6_FLOWLABEL_MASK; write_lock_bh(&ip6_fl_lock); @@ -163,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { - struct ip6_flowlabel *lfl; lfl = __fl_lookup(fl->label); if (lfl == NULL) break; } } + } else { + /* + * we dropper the ip6_fl_lock, so this entry could reappear + * and we need to recheck with it. + * + * OTOH no need to search the active socket first, like it is + * done in ipv6_flowlabel_opt - sock is locked, so new entry + * with the same label can only appear on another sock + */ + lfl = __fl_lookup(fl->label); + if (lfl != NULL) { + atomic_inc(&lfl->users); + write_unlock_bh(&ip6_fl_lock); + return lfl; + } } fl->lastuse = jiffies; @@ -176,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) fl_ht[FL_HASH(fl->label)] = fl; atomic_inc(&fl_size); write_unlock_bh(&ip6_fl_lock); - return 0; + return NULL; } @@ -429,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; struct ipv6_fl_socklist *sfl, **sflp; - struct ip6_flowlabel *fl; + struct ip6_flowlabel *fl, *fl1 = NULL; + if (optlen < sizeof(freq)) return -EINVAL; @@ -485,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); if (freq.flr_label) { - struct ip6_flowlabel *fl1 = NULL; - err = -EEXIST; read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { @@ -505,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (fl1 == NULL) fl1 = fl_lookup(freq.flr_label); if (fl1) { +recheck: err = -EEXIST; if (freq.flr_flags&IPV6_FL_F_EXCL) goto release; @@ -543,9 +559,9 @@ release: if (sfl1 == NULL || (err = mem_check(sk)) != 0) goto done; - err = fl_intern(fl, freq.flr_label); - if (err) - goto done; + fl1 = fl_intern(fl, freq.flr_label); + if (fl1 != NULL) + goto recheck; if (!freq.flr_label) { if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, -- cgit v1.2.3 From 52f095ee88d8851866bc7694ab991ca5abf21d5e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:38:48 -0700 Subject: [IPV6]: Fix again the fl6_sock_lookup() fixed locking YOSHIFUJI fairly pointed out, that the users increment should be done under the ip6_sk_fl_lock not to give IPV6_FL_A_PUT a chance to put this count to zero and release the flowlabel. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index e55ae1a1f56..b12cc22e774 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -210,9 +210,9 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { - read_unlock_bh(&ip6_sk_fl_lock); fl->lastuse = jiffies; atomic_inc(&fl->users); + read_unlock_bh(&ip6_sk_fl_lock); return fl; } } -- cgit v1.2.3