diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/Kconfig | 2 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 21 | ||||
| -rw-r--r-- | net/ipv4/fib_trie.c | 6 | ||||
| -rw-r--r-- | net/ipv4/route.c | 60 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 47 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 118 |
6 files changed, 150 insertions, 104 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9d26a3da37e..5b919f7b45d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -408,7 +408,7 @@ config INET_XFRM_MODE_BEET config INET_LRO bool "Large Receive Offload (ipv4/tcp)" - + default y ---help--- Support for Large Receive Offload (ipv4/tcp). diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 170689681aa..5abee4c9744 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,13 +1246,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff **pp = NULL; struct sk_buff *p; struct iphdr *iph; + unsigned int hlen; + unsigned int off; + unsigned int id; int flush = 1; int proto; - int id; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } proto = iph->protocol & (MAX_INET_PROTOS - 1); @@ -1267,9 +1274,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto out_unlock; - flush = ntohs(iph->tot_len) != skb_gro_len(skb) || - iph->frag_off != htons(IP_DF); - id = ntohs(iph->id); + id = ntohl(*(u32 *)&iph->id); + flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + id >>= 16; for (p = *head; p; p = p->next) { struct iphdr *iph2; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9070d11058e..538d2a9a511 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c4c60e9f068..28205e5bfa9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,8 +784,8 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + struct rtable *rth, *aux, **rthp; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,8 +809,10 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -819,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1068,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1088,7 +1086,6 @@ restart: } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1134,17 +1131,6 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1205,10 +1191,7 @@ restart: } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1224,10 +1207,7 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7a0f0b27bf1..17b89c523f9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -439,12 +439,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { + struct sk_buff *skb; + answ = tp->rcv_nxt - tp->copied_seq; /* Subtract 1, if FIN is in queue. */ - if (answ && !skb_queue_empty(&sk->sk_receive_queue)) - answ -= - tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin; + skb = skb_peek_tail(&sk->sk_receive_queue); + if (answ && skb) + answ -= tcp_hdr(skb)->fin; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); @@ -1382,11 +1384,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Next get a buffer. */ - skb = skb_peek(&sk->sk_receive_queue); - do { - if (!skb) - break; - + skb_queue_walk(&sk->sk_receive_queue, skb) { /* Now that we have two receive queues this * shouldn't happen. */ @@ -1403,8 +1401,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (tcp_hdr(skb)->fin) goto found_fin_ok; WARN_ON(!(flags & MSG_PEEK)); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + } /* Well, if we have backlog, try to process it now yet. */ @@ -2518,20 +2515,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int thlen; unsigned int flags; unsigned int mss = 1; + unsigned int hlen; + unsigned int off; int flush = 1; int i; - th = skb_gro_header(skb, sizeof(*th)); - if (unlikely(!th)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; - th = skb_gro_header(skb, thlen); - if (unlikely(!th)) - goto out; + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } skb_gro_pull(skb, thlen); @@ -2544,7 +2551,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) th2 = tcp_hdr(p); - if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) { + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -2559,14 +2566,14 @@ found: flush |= flags & TCP_FLAG_CWR; flush |= (flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window); - for (i = sizeof(*th); !flush && i < thlen; i += 4) + flush |= th->ack_seq ^ th2->ack_seq; + for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); mss = skb_shinfo(p)->gso_size; - flush |= (len > mss) | !len; + flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eeb8a92aa41..ba34a23c1bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4426,7 +4426,7 @@ drop: } __skb_queue_head(&tp->out_of_order_queue, skb); } else { - struct sk_buff *skb1 = tp->out_of_order_queue.prev; + struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); u32 seq = TCP_SKB_CB(skb)->seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -4443,15 +4443,18 @@ drop: } /* Find place to insert this segment. */ - do { + while (1) { if (!after(TCP_SKB_CB(skb1)->seq, seq)) break; - } while ((skb1 = skb1->prev) != - (struct sk_buff *)&tp->out_of_order_queue); + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { + skb1 = NULL; + break; + } + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + } /* Do skb overlap to previous one? */ - if (skb1 != (struct sk_buff *)&tp->out_of_order_queue && - before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ __kfree_skb(skb); @@ -4463,24 +4466,41 @@ drop: tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); } else { - skb1 = skb1->prev; + if (skb_queue_is_first(&tp->out_of_order_queue, + skb1)) + skb1 = NULL; + else + skb1 = skb_queue_prev( + &tp->out_of_order_queue, + skb1); } } - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + if (!skb1) + __skb_queue_head(&tp->out_of_order_queue, skb); + else + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ - while ((skb1 = skb->next) != - (struct sk_buff *)&tp->out_of_order_queue && - after(end_seq, TCP_SKB_CB(skb1)->seq)) { - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + if (skb1 && !skb_queue_is_last(&tp->out_of_order_queue, skb1)) { + struct sk_buff *n; + + skb1 = skb_queue_next(&tp->out_of_order_queue, skb1); + skb_queue_walk_from_safe(&tp->out_of_order_queue, + skb1, n) { + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; + if (before(end_seq, + TCP_SKB_CB(skb1)->end_seq)) { + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + end_seq); + break; + } + __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - end_seq); - break; + TCP_SKB_CB(skb1)->end_seq); + __kfree_skb(skb1); } - __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); } add_sack: @@ -4492,7 +4512,10 @@ add_sack: static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(list, skb)) + next = skb_queue_next(list, skb); __skb_unlink(skb, list); __kfree_skb(skb); @@ -4503,6 +4526,9 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. + * + * If tail is NULL, this means until the end of the list. + * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ @@ -4511,15 +4537,23 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb; + struct sk_buff *skb, *n; + bool end_of_skbs; /* First, check that queue is collapsible and find * the point where collapsing can be useful. */ - for (skb = head; skb != tail;) { + skb = head; +restart: + end_of_skbs = true; + skb_queue_walk_from_safe(list, skb, n) { + if (skb == tail) + break; /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - continue; + if (!skb) + break; + goto restart; } /* The first skb to collapse is: @@ -4529,16 +4563,24 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, */ if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && (tcp_win_from_space(skb->truesize) > skb->len || - before(TCP_SKB_CB(skb)->seq, start) || - (skb->next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq))) + before(TCP_SKB_CB(skb)->seq, start))) { + end_of_skbs = false; break; + } + + if (!skb_queue_is_last(list, skb)) { + struct sk_buff *next = skb_queue_next(list, skb); + if (next != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { + end_of_skbs = false; + break; + } + } /* Decided to skip this, advance start seq. */ start = TCP_SKB_CB(skb)->end_seq; - skb = skb->next; } - if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) + if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; while (before(start, end)) { @@ -4583,7 +4625,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - if (skb == tail || + if (!skb || + skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; @@ -4610,17 +4653,21 @@ static void tcp_collapse_ofo_queue(struct sock *sk) head = skb; for (;;) { - skb = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) + next = skb_queue_next(&tp->out_of_order_queue, skb); + skb = next; /* Segment is terminated when we see gap or when * we are at the end of all the queue. */ - if (skb == (struct sk_buff *)&tp->out_of_order_queue || + if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { tcp_collapse(sk, &tp->out_of_order_queue, head, skb, start, end); head = skb; - if (skb == (struct sk_buff *)&tp->out_of_order_queue) + if (!skb) break; /* Start new segment */ start = TCP_SKB_CB(skb)->seq; @@ -4681,10 +4728,11 @@ static int tcp_prune_queue(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, &sk->sk_receive_queue, - sk->sk_receive_queue.next, - (struct sk_buff *)&sk->sk_receive_queue, - tp->copied_seq, tp->rcv_nxt); + if (!skb_queue_empty(&sk->sk_receive_queue)) + tcp_collapse(sk, &sk->sk_receive_queue, + skb_peek(&sk->sk_receive_queue), + NULL, + tp->copied_seq, tp->rcv_nxt); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) |
