diff options
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r-- | include/net/tcp.h | 526 |
1 files changed, 316 insertions, 210 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 19f4150f4d4d..38509f047382 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include <linux/dmaengine.h> #include <linux/crypto.h> #include <linux/cryptohash.h> +#include <linux/kref.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -59,12 +60,12 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U +/* Offer an initial receive window of 10 mss. */ +#define TCP_DEFAULT_INIT_RCVWND 10 + /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -/* Minimal RCV_MSS. */ -#define TCP_MIN_RCVMSS 536U - /* The least MTU to use for probing */ #define TCP_BASE_MSS 512 @@ -102,12 +103,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minimum */ - -#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned - * socket. 7 is ~50sec-16min. - */ - - #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN @@ -167,6 +162,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ /* * TCP option lengths @@ -177,6 +173,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ +#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ +#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) +#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -193,6 +193,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ +/* TCP thin-stream limits */ +#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -218,7 +221,7 @@ extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; extern int sysctl_tcp_dsack; -extern int sysctl_tcp_mem[3]; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -237,8 +240,11 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; +extern int sysctl_tcp_cookie_size; +extern int sysctl_tcp_thin_linear_timeouts; +extern int sysctl_tcp_thin_dupack; -extern atomic_t tcp_memory_allocated; +extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; @@ -259,11 +265,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline int tcp_too_many_orphans(struct sock *sk, int num) +static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { - return (num > sysctl_tcp_max_orphans) || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); + struct percpu_counter *ocp = sk->sk_prot->orphan_count; + int orphans = percpu_counter_read_positive(ocp); + + if (orphans << shift > sysctl_tcp_max_orphans) { + orphans = percpu_counter_sum_positive(ocp); + if (orphans << shift > sysctl_tcp_max_orphans) + return true; + } + + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + return true; + return false; } /* syncookies: remember time of last synqueue overflow */ @@ -285,46 +301,33 @@ extern struct proto tcp_prot; #define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) - -extern void tcp_v4_err(struct sk_buff *skb, u32); - -extern void tcp_shutdown (struct sock *sk, int how); - -extern int tcp_v4_rcv(struct sk_buff *skb); - -extern int tcp_v4_remember_stamp(struct sock *sk); - -extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); - -extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size); -extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); - -extern int tcp_ioctl(struct sock *sk, - int cmd, - unsigned long arg); - -extern int tcp_rcv_state_process(struct sock *sk, - struct sk_buff *skb, - struct tcphdr *th, - unsigned len); - -extern int tcp_rcv_established(struct sock *sk, - struct sk_buff *skb, - struct tcphdr *th, - unsigned len); - -extern void tcp_rcv_space_adjust(struct sock *sk); - -extern void tcp_cleanup_rbuf(struct sock *sk, int copied); - -extern int tcp_twsk_unique(struct sock *sk, - struct sock *sktw, void *twp); - -extern void tcp_twsk_destructor(struct sock *sk); - -extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, unsigned int flags); +#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) + +extern void tcp_v4_err(struct sk_buff *skb, u32); + +extern void tcp_shutdown (struct sock *sk, int how); + +extern int tcp_v4_rcv(struct sk_buff *skb); + +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern void *tcp_v4_tw_get_peer(struct sock *sk); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); +extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags); +extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); +extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, unsigned len); +extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, unsigned len); +extern void tcp_rcv_space_adjust(struct sock *sk); +extern void tcp_cleanup_rbuf(struct sock *sk, int copied); +extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); +extern void tcp_twsk_destructor(struct sock *sk); +extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) @@ -341,13 +344,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - -static inline void tcp_clear_options(struct tcp_options_received *rx_opt) -{ - rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; -} - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 @@ -359,8 +355,7 @@ TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) inet_rsk(req)->ecn_ok = 1; } -enum tcp_tw_status -{ +enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, @@ -368,84 +363,59 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, - struct sk_buff *skb, - const struct tcphdr *th); - -extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); -extern int tcp_child_process(struct sock *parent, - struct sock *child, - struct sk_buff *skb); -extern int tcp_use_frto(struct sock *sk); -extern void tcp_enter_frto(struct sock *sk); -extern void tcp_enter_loss(struct sock *sk, int how); -extern void tcp_clear_retrans(struct tcp_sock *tp); -extern void tcp_update_metrics(struct sock *sk); - -extern void tcp_close(struct sock *sk, - long timeout); -extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); - -extern int tcp_getsockopt(struct sock *sk, int level, - int optname, - char __user *optval, - int __user *optlen); -extern int tcp_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int optlen); -extern int compat_tcp_getsockopt(struct sock *sk, - int level, int optname, - char __user *optval, int __user *optlen); -extern int compat_tcp_setsockopt(struct sock *sk, - int level, int optname, - char __user *optval, int optlen); -extern void tcp_set_keepalive(struct sock *sk, int val); -extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, - size_t len, int nonblock, - int flags, int *addr_len); - -extern void tcp_parse_options(struct sk_buff *skb, - struct tcp_options_received *opt_rx, - int estab); - -extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, + struct sk_buff *skb, + const struct tcphdr *th); +extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); +extern int tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +extern int tcp_use_frto(struct sock *sk); +extern void tcp_enter_frto(struct sock *sk); +extern void tcp_enter_loss(struct sock *sk, int how); +extern void tcp_clear_retrans(struct tcp_sock *tp); +extern void tcp_update_metrics(struct sock *sk); +extern void tcp_close(struct sock *sk, long timeout); +extern unsigned int tcp_poll(struct file * file, struct socket *sock, + struct poll_table_struct *wait); +extern int tcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +extern int tcp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +extern void tcp_set_keepalive(struct sock *sk, int val); +extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); +extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, int *addr_len); +extern void tcp_parse_options(struct sk_buff *skb, + struct tcp_options_received *opt_rx, u8 **hvpp, + int estab); +extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); /* * TCP v4 functions exported for the inet6 API */ -extern void tcp_v4_send_check(struct sock *sk, int len, - struct sk_buff *skb); - -extern int tcp_v4_conn_request(struct sock *sk, - struct sk_buff *skb); - -extern struct sock * tcp_create_openreq_child(struct sock *sk, - struct request_sock *req, - struct sk_buff *skb); - -extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); - -extern int tcp_v4_do_rcv(struct sock *sk, +extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); +extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); +extern struct sock * tcp_create_openreq_child(struct sock *sk, + struct request_sock *req, struct sk_buff *skb); - -extern int tcp_v4_connect(struct sock *sk, - struct sockaddr *uaddr, - int addr_len); - -extern int tcp_connect(struct sock *sk); - -extern struct sk_buff * tcp_make_synack(struct sock *sk, - struct dst_entry *dst, - struct request_sock *req); - -extern int tcp_disconnect(struct sock *sk, int flags); +extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); +extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len); +extern int tcp_connect(struct sock *sk); +extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp); +extern int tcp_disconnect(struct sock *sk, int flags); /* From syncookies.c */ @@ -456,7 +426,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); extern __u32 cookie_init_timestamp(struct request_sock *req); -extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt); +extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *); /* From net/ipv6/syncookies.c */ extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); @@ -469,6 +439,7 @@ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); +extern void tcp_retransmit_timer(struct sock *sk); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); @@ -476,10 +447,10 @@ extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int); extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); -extern int tcp_write_wakeup(struct sock *); +extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); -extern int tcp_send_synack(struct sock *); +extern int tcp_send_synack(struct sock *); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); @@ -500,8 +471,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } @@ -521,6 +506,17 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); extern int tcp_mss_to_mtu(struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); +static inline void tcp_bound_rto(const struct sock *sk) +{ + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; +} + +static inline u32 __tcp_set_rto(const struct tcp_sock *tp) +{ + return (tp->srtt >> 3) + tp->rttvar; +} + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | @@ -572,7 +568,7 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. */ -extern u32 __tcp_select_window(struct sock *sk); +extern u32 __tcp_select_window(struct sock *sk); /* TCP timestamps are only 32-bits, this causes a slight * complication on 64-bit systems since we store a snapshot @@ -582,12 +578,22 @@ extern u32 __tcp_select_window(struct sock *sk); */ #define tcp_time_stamp ((__u32)(jiffies)) +#define tcp_flag_byte(th) (((u_int8_t *)th)[13]) + +#define TCPHDR_FIN 0x01 +#define TCPHDR_SYN 0x02 +#define TCPHDR_RST 0x04 +#define TCPHDR_PSH 0x08 +#define TCPHDR_ACK 0x10 +#define TCPHDR_URG 0x20 +#define TCPHDR_ECE 0x40 +#define TCPHDR_CWR 0x80 + /* This is what the send packet queuing engine uses to pass - * TCP per-packet control information to the transmission - * code. We also store the host-order sequence numbers in - * here too. This is 36 bytes on 32-bit architectures, - * 40 bytes on 64-bit machines, if this grows please adjust - * skbuff.h:skbuff->cb[xxx] size appropriately. + * TCP per-packet control information to the transmission code. + * We also store the host-order sequence numbers in here too. + * This is 44 bytes if IPV6 is enabled. + * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately. */ struct tcp_skb_cb { union { @@ -600,19 +606,6 @@ struct tcp_skb_cb { __u32 end_seq; /* SEQ + FIN + SYN + datalen */ __u32 when; /* used to compute rtt's */ __u8 flags; /* TCP header flags. */ - - /* NOTE: These must match up to the flags byte in a - * real TCP header. - */ -#define TCPCB_FLAG_FIN 0x01 -#define TCPCB_FLAG_SYN 0x02 -#define TCPCB_FLAG_RST 0x04 -#define TCPCB_FLAG_PSH 0x08 -#define TCPCB_FLAG_ACK 0x10 -#define TCPCB_FLAG_URG 0x20 -#define TCPCB_FLAG_ECE 0x40 -#define TCPCB_FLAG_CWR 0x80 - __u8 sacked; /* State flags for SACK/FACK. */ #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ @@ -781,6 +774,13 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } +#define TCP_INFINITE_SSTHRESH 0x7fffffff + +static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) +{ + return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. @@ -799,6 +799,15 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); @@ -837,13 +846,6 @@ static inline void tcp_check_probe_timer(struct sock *sk) icsk->icsk_rto, TCP_RTO_MAX); } -static inline void tcp_push_pending_frames(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); -} - static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; @@ -920,7 +922,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -953,7 +955,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale); + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd); static inline int tcp_win_from_space(int space) { @@ -1007,6 +1010,19 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } +static inline int keepalive_probes(const struct tcp_sock *tp) +{ + return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; +} + +static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) +{ + const struct inet_connection_sock *icsk = &tp->inet_conn; + + return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, + tcp_time_stamp - tp->rcv_tstamp); +} + static inline int tcp_fin_time(const struct sock *sk) { int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; @@ -1025,7 +1041,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, return 1; if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return 1; - + /* + * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, + * then following tcp messages have valid values. Ignore 0 value, + * or else 'negative' tsval might forbid us to accept their packets. + */ + if (!rx_opt->ts_recent) + return 1; return 0; } @@ -1139,25 +1161,15 @@ struct tcp_md5sig_pool { union tcp_md5sum_block md5_blk; }; -#define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */ - /* - functions */ -extern int tcp_v4_md5_hash_skb(char *md5_hash, - struct tcp_md5sig_key *key, - struct sock *sk, - struct request_sock *req, - struct sk_buff *skb); - -extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk); - -extern int tcp_v4_md5_do_add(struct sock *sk, - __be32 addr, - u8 *newkey, - u8 newkeylen); - -extern int tcp_v4_md5_do_del(struct sock *sk, - __be32 addr); +extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, struct request_sock *req, + struct sk_buff *skb); +extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk, + struct sock *addr_sk); +extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey, + u8 newkeylen); +extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr); #ifdef CONFIG_TCP_MD5SIG #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \ @@ -1169,33 +1181,18 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); -extern void tcp_free_md5sig_pool(void); +extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); +extern void tcp_free_md5sig_pool(void); + +extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +extern void tcp_put_md5sig_pool(void); -extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); -extern void __tcp_put_md5sig_pool(void); extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key); -static inline -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) -{ - int cpu = get_cpu(); - struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); - if (!ret) - put_cpu(); - return ret; -} - -static inline void tcp_put_md5sig_pool(void) -{ - __tcp_put_md5sig_pool(); - put_cpu(); -} - /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { @@ -1204,6 +1201,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) @@ -1317,6 +1315,15 @@ static inline int tcp_write_queue_empty(struct sock *sk) return skb_queue_empty(&sk->sk_write_queue); } +static inline void tcp_push_pending_frames(struct sock *sk) +{ + if (tcp_send_head(sk)) { + struct tcp_sock *tp = tcp_sk(sk); + + __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); + } +} + /* Start sequence of the highest skb with SACKed bit, valid only if * sacked > 0 or when the caller has ensured validity by itself. */ @@ -1356,6 +1363,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk, tcp_sk(sk)->highest_sack = new; } +/* Determines whether this is a thin stream (which may suffer from + * increased latency). Used to trigger latency-reducing mechanisms. + */ +static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp) +{ + return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, @@ -1376,7 +1391,8 @@ struct tcp_iter_state { sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; - int bucket, sbucket, num, uid; + int bucket, offset, sbucket, num, uid; + loff_t last_pos; }; extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); @@ -1397,7 +1413,7 @@ extern int tcp_gro_complete(struct sk_buff *skb); extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS -extern int tcp4_proc_init(void); +extern int tcp4_proc_init(void); extern void tcp4_proc_exit(void); #endif @@ -1425,9 +1441,99 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); + int (*calc_md5_hash) (char *location, + struct tcp_md5sig_key *md5, + struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); #endif }; +/* Using SHA1 for now, define some constants. + */ +#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) +#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) +#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) + +extern int tcp_cookie_generator(u32 *bakery); + +/** + * struct tcp_cookie_values - each socket needs extra space for the + * cookies, together with (optional) space for any SYN data. + * + * A tcp_sock contains a pointer to the current value, and this is + * cloned to the tcp_timewait_sock. + * + * @cookie_pair: variable data from the option exchange. + * + * @cookie_desired: user specified tcpct_cookie_desired. Zero + * indicates default (sysctl_tcp_cookie_size). + * After cookie sent, remembers size of cookie. + * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. + * + * @s_data_desired: user specified tcpct_s_data_desired. When the + * constant payload is specified (@s_data_constant), + * holds its length instead. + * Range 0 to TCP_MSS_DESIRED. + * + * @s_data_payload: constant data that is to be included in the + * payload of SYN or SYNACK segments when the + * cookie option is present. + */ +struct tcp_cookie_values { + struct kref kref; + u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; + u8 cookie_pair_size; + u8 cookie_desired; + u16 s_data_desired:11, + s_data_constant:1, + s_data_in:1, + s_data_out:1, + s_data_unused:2; + u8 s_data_payload[0]; +}; + +static inline void tcp_cookie_values_release(struct kref *kref) +{ + kfree(container_of(kref, struct tcp_cookie_values, kref)); +} + +/* The length of constant payload data. Note that s_data_desired is + * overloaded, depending on s_data_constant: either the length of constant + * data (returned here) or the limit on variable data. + */ +static inline int tcp_s_data_size(const struct tcp_sock *tp) +{ + return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) + ? tp->cookie_values->s_data_desired + : 0; +} + +/** + * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. + * + * As tcp_request_sock has already been extended in other places, the + * only remaining method is to pass stack values along as function + * parameters. These parameters are not needed after sending SYNACK. + * + * @cookie_bakery: cryptographic secret and message workspace. + * + * @cookie_plus: bytes in authenticator/cookie option, copied from + * struct tcp_options_received (above). + */ +struct tcp_extend_values { + struct request_values rv; + u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; + u8 cookie_plus:6, + cookie_out_never:1, + cookie_in_always:1; +}; + +static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) +{ + return (struct tcp_extend_values *)rvp; +} + extern void tcp_v4_init(void); extern void tcp_init(void); |