From 753eab76a3337863a0d86ce045fa4eb6c3cbeef9 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 22 Nov 2006 20:11:42 -0800 Subject: [UDP]: Make udp_encap_rcv use pskb_may_pull Make udp_encap_rcv use pskb_may_pull IPsec with NAT-T breaks on some notebooks using the latest e1000 chipset, when header split is enabled. When receiving sufficiently large packets, the driver puts everything up to and including the UDP header into the header portion of the skb, and the rest goes into the paged part. udp_encap_rcv forgets to use pskb_may_pull, and fails to decapsulate it. Instead, it passes it up it to the IKE daemon. Signed-off-by: Olaf Kirch Signed-off-by: Jean Delvare Signed-off-by: David S. Miller --- net/ipv4/udp.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 865d75214a9..9e1bd374875 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -928,23 +928,32 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) return 1; #else struct udp_sock *up = udp_sk(sk); - struct udphdr *uh = skb->h.uh; + struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); - __be32 *udpdata32 = (__be32 *)udpdata; + __u8 *udpdata; + __be32 *udpdata32; __u16 encap_type = up->encap_type; /* if we're overly short, let UDP handle it */ - if (udpdata > skb->tail) + len = skb->len - sizeof(struct udphdr); + if (len <= 0) return 1; /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; - len = skb->tail - udpdata; + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = skb->h.uh; + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; switch (encap_type) { default: -- cgit v1.2.3 From ba4e58eca8aa9473b44fdfd312f26c4a2e7798b3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 11:10:57 -0800 Subject: [NET]: Supporting UDP-Lite (RFC 3828) in Linux This is a revision of the previously submitted patch, which alters the way files are organized and compiled in the following manner: * UDP and UDP-Lite now use separate object files * source file dependencies resolved via header files net/ipv{4,6}/udp_impl.h * order of inclusion files in udp.c/udplite.c adapted accordingly [NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828) This patch adds support for UDP-Lite to the IPv4 stack, provided as an extension to the existing UDPv4 code: * generic routines are all located in net/ipv4/udp.c * UDP-Lite specific routines are in net/ipv4/udplite.c * MIB/statistics support in /proc/net/snmp and /proc/net/udplite * shared API with extensions for partial checksum coverage [NET/IPv6]: Extension for UDP-Lite over IPv6 It extends the existing UDPv6 code base with support for UDP-Lite in the same manner as per UDPv4. In particular, * UDPv6 generic and shared code is in net/ipv6/udp.c * UDP-Litev6 specific extensions are in net/ipv6/udplite.c * MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6 * support for IPV6_ADDRFORM * aligned the coding style of protocol initialisation with af_inet6.c * made the error handling in udpv6_queue_rcv_skb consistent; to return `-1' on error on all error cases * consolidation of shared code [NET]: UDP-Lite Documentation and basic XFRM/Netfilter support The UDP-Lite patch further provides * API documentation for UDP-Lite * basic xfrm support * basic netfilter support for IPv4 and IPv6 (LOG target) Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- Documentation/networking/udplite.txt | 281 +++++++++++++++++++ include/linux/in.h | 1 + include/linux/socket.h | 1 + include/linux/udp.h | 12 + include/net/ipv6.h | 12 +- include/net/transp_v6.h | 2 + include/net/udp.h | 91 +++++- include/net/udplite.h | 149 ++++++++++ include/net/xfrm.h | 2 + net/ipv4/Makefile | 3 +- net/ipv4/af_inet.c | 8 +- net/ipv4/netfilter/ipt_LOG.c | 11 +- net/ipv4/proc.c | 13 + net/ipv4/udp.c | 518 ++++++++++++++++++++--------------- net/ipv4/udp_impl.h | 38 +++ net/ipv4/udplite.c | 119 ++++++++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/Makefile | 4 +- net/ipv6/af_inet6.c | 21 +- net/ipv6/ipv6_sockglue.c | 11 +- net/ipv6/netfilter/ip6t_LOG.c | 10 +- net/ipv6/proc.c | 11 + net/ipv6/udp.c | 361 +++++++++++++----------- net/ipv6/udp_impl.h | 34 +++ net/ipv6/udplite.c | 105 +++++++ net/ipv6/xfrm6_policy.c | 1 + net/netfilter/xt_multiport.c | 5 +- net/netfilter/xt_tcpudp.c | 20 +- 28 files changed, 1442 insertions(+), 403 deletions(-) create mode 100644 Documentation/networking/udplite.txt create mode 100644 include/net/udplite.h create mode 100644 net/ipv4/udp_impl.h create mode 100644 net/ipv4/udplite.c create mode 100644 net/ipv6/udp_impl.h create mode 100644 net/ipv6/udplite.c (limited to 'net/ipv4/udp.c') diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt new file mode 100644 index 00000000000..dd6f46b83da --- /dev/null +++ b/Documentation/networking/udplite.txt @@ -0,0 +1,281 @@ + =========================================================================== + The UDP-Lite protocol (RFC 3828) + =========================================================================== + + + UDP-Lite is a Standards-Track IETF transport protocol whose characteristic + is a variable-length checksum. This has advantages for transport of multimedia + (video, VoIP) over wireless networks, as partly damaged packets can still be + fed into the codec instead of being discarded due to a failed checksum test. + + This file briefly describes the existing kernel support and the socket API. + For in-depth information, you can consult: + + o The UDP-Lite Homepage: http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/ + Fom here you can also download some example application source code. + + o The UDP-Lite HOWTO on + http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/files/UDP-Lite-HOWTO.txt + + o The Wireshark UDP-Lite WiKi (with capture files): + http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol + + o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt + + + I) APPLICATIONS + + Several applications have been ported successfully to UDP-Lite. Ethereal + (now called wireshark) has UDP-Litev4/v6 support by default. The tarball on + + http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/files/udplite_linux.tar.gz + + has source code for several v4/v6 client-server and network testing examples. + + Porting applications to UDP-Lite is straightforward: only socket level and + IPPROTO need to be changed; senders additionally set the checksum coverage + length (default = header length = 8). Details are in the next section. + + + II) PROGRAMMING API + + UDP-Lite provides a connectionless, unreliable datagram service and hence + uses the same socket type as UDP. In fact, porting from UDP to UDP-Lite is + very easy: simply add `IPPROTO_UDPLITE' as the last argument of the socket(2) + call so that the statement looks like: + + s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE); + + or, respectively, + + s = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE); + + With just the above change you are able to run UDP-Lite services or connect + to UDP-Lite servers. The kernel will assume that you are not interested in + using partial checksum coverage and so emulate UDP mode (full coverage). + + To make use of the partial checksum coverage facilities requires setting a + single socket option, which takes an integer specifying the coverage length: + + * Sender checksum coverage: UDPLITE_SEND_CSCOV + + For example, + + int val = 20; + setsockopt(s, SOL_UDPLITE, UDPLITE_SEND_CSCOV, &val, sizeof(int)); + + sets the checksum coverage length to 20 bytes (12b data + 8b header). + Of each packet only the first 20 bytes (plus the pseudo-header) will be + checksummed. This is useful for RTP applications which have a 12-byte + base header. + + + * Receiver checksum coverage: UDPLITE_RECV_CSCOV + + This option is the receiver-side analogue. It is truly optional, i.e. not + required to enable traffic with partial checksum coverage. Its function is + that of a traffic filter: when enabled, it instructs the kernel to drop + all packets which have a coverage _less_ than this value. For example, if + RTP and UDP headers are to be protected, a receiver can enforce that only + packets with a minimum coverage of 20 are admitted: + + int min = 20; + setsockopt(s, SOL_UDPLITE, UDPLITE_RECV_CSCOV, &min, sizeof(int)); + + The calls to getsockopt(2) are analogous. Being an extension and not a stand- + alone protocol, all socket options known from UDP can be used in exactly the + same manner as before, e.g. UDP_CORK or UDP_ENCAP. + + A detailed discussion of UDP-Lite checksum coverage options is in section IV. + + + III) HEADER FILES + + The socket API requires support through header files in /usr/include: + + * /usr/include/netinet/in.h + to define IPPROTO_UDPLITE + + * /usr/include/netinet/udplite.h + for UDP-Lite header fields and protocol constants + + For testing purposes, the following can serve as a `mini' header file: + + #define IPPROTO_UDPLITE 136 + #define SOL_UDPLITE 136 + #define UDPLITE_SEND_CSCOV 10 + #define UDPLITE_RECV_CSCOV 11 + + Ready-made header files for various distros are in the UDP-Lite tarball. + + + IV) KERNEL BEHAVIOUR WITH REGARD TO THE VARIOUS SOCKET OPTIONS + + To enable debugging messages, the log level need to be set to 8, as most + messages use the KERN_DEBUG level (7). + + 1) Sender Socket Options + + If the sender specifies a value of 0 as coverage length, the module + assumes full coverage, transmits a packet with coverage length of 0 + and according checksum. If the sender specifies a coverage < 8 and + different from 0, the kernel assumes 8 as default value. Finally, + if the specified coverage length exceeds the packet length, the packet + length is used instead as coverage length. + + 2) Receiver Socket Options + + The receiver specifies the minimum value of the coverage length it + is willing to accept. A value of 0 here indicates that the receiver + always wants the whole of the packet covered. In this case, all + partially covered packets are dropped and an error is logged. + + It is not possible to specify illegal values (<0 and <8); in these + cases the default of 8 is assumed. + + All packets arriving with a coverage value less than the specified + threshold are discarded, these events are also logged. + + 3) Disabling the Checksum Computation + + On both sender and receiver, checksumming will always be performed + and can not be disabled using SO_NO_CHECK. Thus + + setsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, ... ); + + will always will be ignored, while the value of + + getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...); + + is meaningless (as in TCP). Packets with a zero checksum field are + illegal (cf. RFC 3828, sec. 3.1) will be silently discarded. + + 4) Fragmentation + + The checksum computation respects both buffersize and MTU. The size + of UDP-Lite packets is determined by the size of the send buffer. The + minimum size of the send buffer is 2048 (defined as SOCK_MIN_SNDBUF + in include/net/sock.h), the default value is configurable as + net.core.wmem_default or via setting the SO_SNDBUF socket(7) + option. The maximum upper bound for the send buffer is determined + by net.core.wmem_max. + + Given a payload size larger than the send buffer size, UDP-Lite will + split the payload into several individual packets, filling up the + send buffer size in each case. + + The precise value also depends on the interface MTU. The interface MTU, + in turn, may trigger IP fragmentation. In this case, the generated + UDP-Lite packet is split into several IP packets, of which only the + first one contains the L4 header. + + The send buffer size has implications on the checksum coverage length. + Consider the following example: + + Payload: 1536 bytes Send Buffer: 1024 bytes + MTU: 1500 bytes Coverage Length: 856 bytes + + UDP-Lite will ship the 1536 bytes in two separate packets: + + Packet 1: 1024 payload + 8 byte header + 20 byte IP header = 1052 bytes + Packet 2: 512 payload + 8 byte header + 20 byte IP header = 540 bytes + + The coverage packet covers the UDP-Lite header and 848 bytes of the + payload in the first packet, the second packet is fully covered. Note + that for the second packet, the coverage length exceeds the packet + length. The kernel always re-adjusts the coverage length to the packet + length in such cases. + + As an example of what happens when one UDP-Lite packet is split into + several tiny fragments, consider the following example. + + Payload: 1024 bytes Send buffer size: 1024 bytes + MTU: 300 bytes Coverage length: 575 bytes + + +-+-----------+--------------+--------------+--------------+ + |8| 272 | 280 | 280 | 280 | + +-+-----------+--------------+--------------+--------------+ + 280 560 840 1032 + ^ + *****checksum coverage************* + + The UDP-Lite module generates one 1032 byte packet (1024 + 8 byte + header). According to the interface MTU, these are split into 4 IP + packets (280 byte IP payload + 20 byte IP header). The kernel module + sums the contents of the entire first two packets, plus 15 bytes of + the last packet before releasing the fragments to the IP module. + + To see the analogous case for IPv6 fragmentation, consider a link + MTU of 1280 bytes and a write buffer of 3356 bytes. If the checksum + coverage is less than 1232 bytes (MTU minus IPv6/fragment header + lengths), only the first fragment needs to be considered. When using + larger checksum coverage lengths, each eligible fragment needs to be + checksummed. Suppose we have a checksum coverage of 3062. The buffer + of 3356 bytes will be split into the following fragments: + + Fragment 1: 1280 bytes carrying 1232 bytes of UDP-Lite data + Fragment 2: 1280 bytes carrying 1232 bytes of UDP-Lite data + Fragment 3: 948 bytes carrying 900 bytes of UDP-Lite data + + The first two fragments have to be checksummed in full, of the last + fragment only 598 (= 3062 - 2*1232) bytes are checksummed. + + While it is important that such cases are dealt with correctly, they + are (annoyingly) rare: UDP-Lite is designed for optimising multimedia + performance over wireless (or generally noisy) links and thus smaller + coverage lenghts are likely to be expected. + + + V) UDP-LITE RUNTIME STATISTICS AND THEIR MEANING + + Exceptional and error conditions are logged to syslog at the KERN_DEBUG + level. Live statistics about UDP-Lite are available in /proc/net/snmp + and can (with newer versions of netstat) be viewed using + + netstat -svu + + This displays UDP-Lite statistics variables, whose meaning is as follows. + + InDatagrams: Total number of received datagrams. + + NoPorts: Number of packets received to an unknown port. + These cases are counted separately (not as InErrors). + + InErrors: Number of erroneous UDP-Lite packets. Errors include: + * internal socket queue receive errors + * packet too short (less than 8 bytes or stated + coverage length exceeds received length) + * xfrm4_policy_check() returned with error + * application has specified larger min. coverage + length than that of incoming packet + * checksum coverage violated + * bad checksum + + OutDatagrams: Total number of sent datagrams. + + These statistics derive from the UDP MIB (RFC 2013). + + + VI) IPTABLES + + There is packet match support for UDP-Lite as well as support for the LOG target. + If you copy and paste the following line into /etc/protcols, + + udplite 136 UDP-Lite # UDP-Lite [RFC 3828] + + then + iptables -A INPUT -p udplite -j LOG + + will produce logging output to syslog. Dropping and rejecting packets also works. + + + VII) MAINTAINER ADDRESS + + The UDP-Lite patch was developed at + University of Aberdeen + Electronics Research Group + Department of Engineering + Fraser Noble Building + Aberdeen AB24 3UE; UK + The current maintainer is Gerrit Renker, . Initial + code was developed by William Stanislaus, . diff --git a/include/linux/in.h b/include/linux/in.h index 2619859f6e1..1912e7c0bc2 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -45,6 +45,7 @@ enum { IPPROTO_COMP = 108, /* Compression Header protocol */ IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ + IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_MAX diff --git a/include/linux/socket.h b/include/linux/socket.h index 36140909464..592b6667982 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -264,6 +264,7 @@ struct ucred { #define SOL_IPV6 41 #define SOL_ICMPV6 58 #define SOL_SCTP 132 +#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */ #define SOL_RAW 255 #define SOL_IPX 256 #define SOL_AX25 257 diff --git a/include/linux/udp.h b/include/linux/udp.h index 014b41d1e30..564f3b05010 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -38,6 +38,7 @@ struct udphdr { #include #include +#define UDP_HTABLE_SIZE 128 struct udp_sock { /* inet_sock has to be the first member */ @@ -50,12 +51,23 @@ struct udp_sock { * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ + /* + * Fields specific to UDP-Lite. + */ + __u16 pcslen; + __u16 pcrlen; +/* indicator bits used by pcflag: */ +#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ +#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ + __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ }; static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; } +#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3c266ad99a0..9390649bbfe 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -158,9 +158,13 @@ DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset); \ }) DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); -#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) -#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) -#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) +DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); +#define UDP6_INC_STATS_BH(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ + else SNMP_INC_STATS_BH(udp_stats_in6, field); } while(0) +#define UDP6_INC_STATS_USER(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ + else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0) int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); @@ -604,6 +608,8 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); +extern int udplite6_proc_init(void); +extern void udplite6_proc_exit(void); extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 61f724c1036..409da3a9a45 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -11,6 +11,7 @@ extern struct proto rawv6_prot; extern struct proto udpv6_prot; +extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; struct flowi; @@ -24,6 +25,7 @@ extern void ipv6_destopt_init(void); /* transport protocols */ extern void rawv6_init(void); extern void udpv6_init(void); +extern void udplitev6_init(void); extern void tcpv6_init(void); extern int udpv6_connect(struct sock *sk, diff --git a/include/net/udp.h b/include/net/udp.h index db0c05f6754..4f0626735ed 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -26,9 +26,28 @@ #include #include #include +#include +#include #include -#define UDP_HTABLE_SIZE 128 +/** + * struct udp_skb_cb - UDP(-Lite) private variables + * + * @header: private variables used by IPv4/IPv6 + * @cscov: checksum coverage length (UDP-Lite only) + * @partial_cov: if set indicates partial csum coverage + */ +struct udp_skb_cb { + union { + struct inet_skb_parm h4; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + struct inet6_skb_parm h6; +#endif + } header; + __u16 cscov; + __u8 partial_cov; +}; +#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; @@ -47,6 +66,62 @@ extern struct proto udp_prot; struct sk_buff; +/* + * Generic checksumming routines for UDP(-Lite) v4 and v6 + */ +static inline u16 __udp_lib_checksum_complete(struct sk_buff *skb) +{ + if (! UDP_SKB_CB(skb)->partial_cov) + return __skb_checksum_complete(skb); + return csum_fold(skb_checksum(skb, 0, UDP_SKB_CB(skb)->cscov, + skb->csum)); +} + +static __inline__ int udp_lib_checksum_complete(struct sk_buff *skb) +{ + return skb->ip_summed != CHECKSUM_UNNECESSARY && + __udp_lib_checksum_complete(skb); +} + +/** + * udp_csum_outgoing - compute UDPv4/v6 checksum over fragments + * @sk: socket we are writing to + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static inline u32 udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) +{ + u32 csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0); + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + return csum; +} + +/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ +static inline void udp_lib_hash(struct sock *sk) +{ + BUG(); +} + +static inline void udp_lib_unhash(struct sock *sk) +{ + write_lock_bh(&udp_hash_lock); + if (sk_del_node_init(sk)) { + inet_sk(sk)->num = 0; + sock_prot_dec_use(sk->sk_prot); + } + write_unlock_bh(&udp_hash_lock); +} + +static inline void udp_lib_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + + +/* net/ipv4/udp.c */ extern int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); @@ -61,21 +136,29 @@ extern unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); DECLARE_SNMP_STAT(struct udp_mib, udp_statistics); -#define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field) -#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field) -#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_statistics, field) +/* + * SNMP statistics for UDP and UDP-Lite + */ +#define UDP_INC_STATS_USER(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field); \ + else SNMP_INC_STATS_USER(udp_statistics, field); } while(0) +#define UDP_INC_STATS_BH(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field); \ + else SNMP_INC_STATS_BH(udp_statistics, field); } while(0) /* /proc */ struct udp_seq_afinfo { struct module *owner; char *name; sa_family_t family; + struct hlist_head *hashtable; int (*seq_show) (struct seq_file *m, void *v); struct file_operations *seq_fops; }; struct udp_iter_state { sa_family_t family; + struct hlist_head *hashtable; int bucket; struct seq_operations seq_ops; }; diff --git a/include/net/udplite.h b/include/net/udplite.h new file mode 100644 index 00000000000..1473b3e4904 --- /dev/null +++ b/include/net/udplite.h @@ -0,0 +1,149 @@ +/* + * Definitions for the UDP-Lite (RFC 3828) code. + */ +#ifndef _UDPLITE_H +#define _UDPLITE_H + +/* UDP-Lite socket options */ +#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ +#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ + +extern struct proto udplite_prot; +extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; + +/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ +DECLARE_SNMP_STAT(struct udp_mib, udplite_statistics); + +/* + * Checksum computation is all in software, hence simpler getfrag. + */ +static __inline__ int udplite_getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb) +{ + return memcpy_fromiovecend(to, (struct iovec *) from, offset, len); +} + +/* Designate sk as UDP-Lite socket */ +static inline int udplite_sk_init(struct sock *sk) +{ + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + +/* + * Checksumming routines + */ +static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) +{ + u16 cscov; + + /* In UDPv4 a zero checksum means that the transmitter generated no + * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets + * with a zero checksum field are illegal. */ + if (uh->check == 0) { + LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: zeroed checksum field\n"); + return 1; + } + + UDP_SKB_CB(skb)->partial_cov = 0; + cscov = ntohs(uh->len); + + if (cscov == 0) /* Indicates that full coverage is required. */ + cscov = skb->len; + else if (cscov < 8 || cscov > skb->len) { + /* + * Coverage length violates RFC 3828: log and discard silently. + */ + LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: bad csum coverage %d/%d\n", + cscov, skb->len); + return 1; + + } else if (cscov < skb->len) + UDP_SKB_CB(skb)->partial_cov = 1; + + UDP_SKB_CB(skb)->cscov = cscov; + + /* + * There is no known NIC manufacturer supporting UDP-Lite yet, + * hence ip_summed is always (re-)set to CHECKSUM_NONE. + */ + skb->ip_summed = CHECKSUM_NONE; + + return 0; +} + +static __inline__ int udplite4_csum_init(struct sk_buff *skb, struct udphdr *uh) +{ + int rc = udplite_checksum_init(skb, uh); + + if (!rc) + skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, + skb->nh.iph->daddr, + skb->len, IPPROTO_UDPLITE, 0); + return rc; +} + +static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh) +{ + int rc = udplite_checksum_init(skb, uh); + + if (!rc) + skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDPLITE, 0); + return rc; +} + +static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) +{ + int cscov = up->len; + + /* + * Sender has set `partial coverage' option on UDP-Lite socket + */ + if (up->pcflag & UDPLITE_SEND_CC) { + if (up->pcslen < up->len) { + /* up->pcslen == 0 means that full coverage is required, + * partial coverage only if 0 < up->pcslen < up->len */ + if (0 < up->pcslen) { + cscov = up->pcslen; + } + uh->len = htons(up->pcslen); + } + /* + * NOTE: Causes for the error case `up->pcslen > up->len': + * (i) Application error (will not be penalized). + * (ii) Payload too big for send buffer: data is split + * into several packets, each with its own header. + * In this case (e.g. last segment), coverage may + * exceed packet length. + * Since packets with coverage length > packet length are + * illegal, we fall back to the defaults here. + */ + } + return cscov; +} + +static inline u32 udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) +{ + u32 csum = 0; + int off, len, cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh); + + skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ + + skb_queue_walk(&sk->sk_write_queue, skb) { + off = skb->h.raw - skb->data; + len = skb->len - off; + + csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); + + if ((cscov -= len) <= 0) + break; + } + return csum; +} + +extern void udplite4_register(void); +extern int udplite_get_port(struct sock *sk, unsigned short snum, + int (*scmp)(const struct sock *, const struct sock *)); +#endif /* _UDPLITE_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 81c91e8a328..3878a88ff61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -468,6 +468,7 @@ __be16 xfrm_flowi_sport(struct flowi *fl) switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = fl->fl_ip_sport; break; @@ -493,6 +494,7 @@ __be16 xfrm_flowi_dport(struct flowi *fl) switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = fl->fl_ip_dport; break; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15645c51520..7a068626fee 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,8 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ + datagram.o raw.o udp.o udplite.o \ + arp.o icmp.o devinet.o af_inet.o igmp.o \ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4a81d54a756..8db39f7e3bf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include @@ -1223,10 +1224,13 @@ static int __init init_ipv4_mibs(void) tcp_statistics[1] = alloc_percpu(struct tcp_mib); udp_statistics[0] = alloc_percpu(struct udp_mib); udp_statistics[1] = alloc_percpu(struct udp_mib); + udplite_statistics[0] = alloc_percpu(struct udp_mib); + udplite_statistics[1] = alloc_percpu(struct udp_mib); if (! (net_statistics[0] && net_statistics[1] && ip_statistics[0] && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] - && udp_statistics[0] && udp_statistics[1])) + && udp_statistics[0] && udp_statistics[1] + && udplite_statistics[0] && udplite_statistics[1] ) ) return -ENOMEM; (void) tcp_mib_init(); @@ -1313,6 +1317,8 @@ static int __init inet_init(void) /* Setup TCP slab cache for open requests. */ tcp_init(); + /* Add UDP-Lite (RFC 3828) */ + udplite4_register(); /* * Set the ICMP layer up diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 7dc820df8bc..46eee64a11f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -171,11 +171,15 @@ static void dump_packet(const struct nf_loginfo *info, } break; } - case IPPROTO_UDP: { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: { struct udphdr _udph, *uh; - /* Max length: 10 "PROTO=UDP " */ - printk("PROTO=UDP "); + if (ih->protocol == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP " ); + else /* Max length: 14 "PROTO=UDPLITE " */ + printk("PROTO=UDPLITE "); if (ntohs(ih->frag_off) & IP_OFFSET) break; @@ -341,6 +345,7 @@ static void dump_packet(const struct nf_loginfo *info, /* IP: 40+46+6+11+127 = 230 */ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ /* ESP: 10+max(25)+15 = 50 */ /* AH: 9+max(25)+15 = 49 */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9c6cbe3d9fb..cd873da54cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, atomic_read(&ip_frag_mem)); @@ -304,6 +306,17 @@ static int snmp_seq_show(struct seq_file *seq, void *v) fold_field((void **) udp_statistics, snmp4_udp_list[i].entry)); + /* the UDP and UDP-Lite MIBs are the same */ + seq_puts(seq, "\nUdpLite:"); + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + seq_printf(seq, " %s", snmp4_udp_list[i].name); + + seq_puts(seq, "\nUdpLite:"); + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + seq_printf(seq, " %lu", + fold_field((void **) udplite_statistics, + snmp4_udp_list[i].entry) ); + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9e1bd374875..98ba7509617 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -92,22 +92,16 @@ #include #include #include -#include #include -#include -#include #include -#include #include #include #include -#include -#include #include #include -#include #include #include +#include "udp_impl.h" /* * Snmp MIB for the UDP layer @@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -static inline int udp_lport_inuse(u16 num) +static inline int __udp_lib_lport_inuse(__be16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) if (inet_sk(sk)->num == num) return 1; return 0; } /** - * udp_get_port - common port lookup for IPv4 and IPv6 + * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up + * @udptable: hash list table, must be of UDP_HTABLE_SIZE + * @port_rover: pointer to record of last unallocated port * @saddr_comp: AF-dependent comparison of bound local IP addresses */ -int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) +int __udp_lib_get_port(struct sock *sk, unsigned short snum, + struct hlist_head udptable[], int *port_rover, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2 ) ) { struct hlist_node *node; struct hlist_head *head; @@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, if (snum == 0) { int best_size_so_far, best, result, i; - if (udp_port_rover > sysctl_local_port_range[1] || - udp_port_rover < sysctl_local_port_range[0]) - udp_port_rover = sysctl_local_port_range[0]; + if (*port_rover > sysctl_local_port_range[1] || + *port_rover < sysctl_local_port_range[0]) + *port_rover = sysctl_local_port_range[0]; best_size_so_far = 32767; - best = result = udp_port_rover; + best = result = *port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) + if (! __udp_lib_lport_inuse(result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) goto fail; gotit: - udp_port_rover = snum = result; + *port_rover = snum = result; } else { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) if (inet_sk(sk2)->num == snum && @@ -195,12 +193,12 @@ gotit: (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_cmp)(sk, sk2) ) + (*saddr_comp)(sk, sk2) ) goto fail; } inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -210,7 +208,13 @@ fail: return error; } -static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +__inline__ int udp_get_port(struct sock *sk, unsigned short snum, + int (*scmp)(const struct sock *, const struct sock *)) +{ + return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); +} + +inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); } - -static void udp_v4_hash(struct sock *sk) -{ - BUG(); -} - -static void udp_v4_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_dec_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); -} - /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); int badness = -1; - sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { @@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, } } } - return result; -} - -static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) -{ - struct sock *sk; - - read_lock(&udp_hash_lock); - sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); - if (sk) - sock_hold(sk); + if (result) + sock_hold(result); read_unlock(&udp_hash_lock); - return sk; + return result; } static inline struct sock *udp_v4_mcast_next(struct sock *sk, @@ -340,7 +320,7 @@ found: * to find the appropriate port. */ -void udp_err(struct sk_buff *skb, u32 info) +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info) int harderr; int err; - sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); + sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, + skb->dev->ifindex, udptable ); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -405,6 +386,11 @@ out: sock_put(sk); } +__inline__ void udp_err(struct sk_buff *skb, u32 info) +{ + return __udp4_lib_err(skb, info, udp_hash); +} + /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -419,16 +405,56 @@ static void udp_flush_pending_frames(struct sock *sk) } } +/** + * udp4_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, int len ) +{ + unsigned int csum = 0, offset; + struct udphdr *uh = skb->h.uh; + + if (skb_queue_len(&sk->sk_write_queue) == 1) { + /* + * Only one fragment on the socket. + */ + skb->csum = offsetof(struct udphdr, check); + uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + } else { + /* + * HW-checksum won't work as there are two or more + * fragments on the socket so that all csums of sk_buffs + * should be together + */ + offset = skb->h.raw - skb->data; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + + skb->ip_summed = CHECKSUM_NONE; + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + + uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = -1; + } +} + /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) +int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) { struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; struct udphdr *uh; int err = 0; + u32 csum = 0; /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) @@ -443,52 +469,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->len = htons(up->len); uh->check = 0; - if (sk->sk_no_check == UDP_CSUM_NOXMIT) { + if (up->pcflag) /* UDP-Lite */ + csum = udplite_csum_outgoing(sk, skb); + + else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; goto send; - } - if (skb_queue_len(&sk->sk_write_queue) == 1) { - /* - * Only one fragment on the socket. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb->csum = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, 0); - } else { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, skb->csum); - if (uh->check == 0) - uh->check = -1; - } - } else { - unsigned int csum = 0; - /* - * HW-checksum won't work as there are two or more - * fragments on the socket so that all csums of sk_buffs - * should be together. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - int offset = (unsigned char *)uh - skb->data; - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - skb->ip_summed = CHECKSUM_NONE; - } else { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - } + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + goto send; + + } else /* `normal' UDP */ + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + sk->sk_protocol, csum ); + if (uh->check == 0) + uh->check = -1; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = -1; - } send: err = ip_push_pending_frames(sk); out: @@ -497,12 +499,6 @@ out: return err; } - -static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) -{ - return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); -} - int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -516,8 +512,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __be32 daddr, faddr, saddr; __be16 dport; u8 tos; - int err; + int err, is_udplite = up->pcflag; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); if (len > 0xFFFF) return -EMSGSIZE; @@ -622,7 +619,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .daddr = faddr, .saddr = saddr, .tos = tos } }, - .proto = IPPROTO_UDP, + .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; @@ -668,8 +665,9 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); @@ -684,7 +682,7 @@ out: if (free) kfree(ipc.opt); if (!err) { - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return len; } /* @@ -695,7 +693,7 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -707,8 +705,8 @@ do_confirm: goto out; } -static int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct udp_sock *up = udp_sk(sk); int ret; @@ -795,29 +793,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return(0); } -static __inline__ int __udp_checksum_complete(struct sk_buff *skb) -{ - return __skb_checksum_complete(skb); -} - -static __inline__ int udp_checksum_complete(struct sk_buff *skb) -{ - return skb->ip_summed != CHECKSUM_UNNECESSARY && - __udp_checksum_complete(skb); -} - /* * This should be easy, if there is something there we * return it, otherwise we block. */ -static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - int copied, err; + int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); /* * Check any passed addresses @@ -839,15 +826,25 @@ try_again: msg->msg_flags |= MSG_TRUNC; } - if (skb->ip_summed==CHECKSUM_UNNECESSARY) { - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else if (msg->msg_flags&MSG_TRUNC) { - if (__udp_checksum_complete(skb)) + /* + * Decide whether to checksum and/or copy data. + * + * UDP: checksum may have been computed in HW, + * (re-)compute it if message is truncated. + * UDP-Lite: always needs to checksum, no HW support. + */ + copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); + + if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { + if (__udp_lib_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else { + copy_only = 1; + } + + if (copy_only) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -880,7 +877,7 @@ out: return err; csum_copy_err: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); skb_kill_datagram(sk, skb, flags); @@ -912,11 +909,6 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } -static void udp_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - /* return: * 1 if the the UDP system should process it * 0 if we should drop this packet @@ -1022,7 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; @@ -1030,10 +1022,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* * Charge it to the socket, dropping if the queue is full. */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return -1; - } + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; nf_reset(skb); if (up->encap_type) { @@ -1057,31 +1047,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (ret < 0) { /* process the ESP packet */ ret = xfrm4_rcv_encap(skb, up->encap_type); - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return -ret; } /* FALLTHROUGH -- it's a UDP Packet */ } - if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { - if (__udp_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return -1; + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are + * disabled for the following two types of errors: these depend + * on the application settings, not on the functioning of the + * protocol stack as such. + * + * RFC 3828 here recommends (sec 3.3): "There should also be a + * way ... to ... at least let the receiving application block + * delivery of packets with coverage values less than a value + * provided by the application." + */ + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " + "%d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; } + /* The next case involves violating the min. coverage requested + * by the receiver. This is subtle: if receiver wants x and x is + * greater than the buffersize/MTU then receiver will complain + * that it wants x while sender emits packets of smaller size y. + * Therefore the above ...()->partial_cov statement is essential. + */ + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING + "UDPLITE: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { + if (__udp_lib_checksum_complete(skb)) + goto drop; skb->ip_summed = CHECKSUM_UNNECESSARY; } if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return -1; + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + goto drop; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return 0; + +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + kfree_skb(skb); + return -1; } /* @@ -1090,14 +1117,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, - __be32 saddr, __be32 daddr) +static int __udp4_lib_mcast_deliver(struct sk_buff *skb, + struct udphdr *uh, + __be32 saddr, __be32 daddr, + struct hlist_head udptable[]) { struct sock *sk; int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1131,65 +1160,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, * Otherwise, csum completion requires chacksumming packet body, * including udp header and folding it to skb->csum. */ -static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) +static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) + if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len, IPPROTO_UDP, skb->csum )) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); + skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, + skb->nh.iph->daddr, + skb->len, IPPROTO_UDP, 0); /* Probably, we should checksum udp header (it should be in cache * in any case) and data in tiny packets (< rx copybreak). */ + + /* UDP = UDP-Lite with a non-partial checksum coverage */ + UDP_SKB_CB(skb)->partial_cov = 0; } /* * All we need to do is get the socket, and then do a checksum. */ -int udp_rcv(struct sk_buff *skb) +int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int is_udplite) { struct sock *sk; - struct udphdr *uh; + struct udphdr *uh = skb->h.uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = skb->nh.iph->saddr; __be32 daddr = skb->nh.iph->daddr; - int len = skb->len; /* - * Validate the packet and the UDP length. + * Validate the packet. */ if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto no_header; - - uh = skb->h.uh; + goto drop; /* No space for header. */ ulen = ntohs(uh->len); - - if (ulen > len || ulen < sizeof(*uh)) + if (ulen > skb->len) goto short_packet; - if (pskb_trim_rcsum(skb, ulen)) - goto short_packet; + if(! is_udplite ) { /* UDP validates ulen. */ + + if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + goto short_packet; - udp_checksum_init(skb, uh, ulen, saddr, daddr); + udp4_csum_init(skb, uh); + + } else { /* UDP-Lite validates cscov. */ + if (udplite4_csum_init(skb, uh)) + goto csum_error; + } if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return udp_v4_mcast_deliver(skb, uh, saddr, daddr); + return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); + sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, + skb->dev->ifindex, udptable ); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but - * it it wants the return to be -protocol, or 0 + * it wants the return to be -protocol, or 0 */ if (ret > 0) return -ret; @@ -1201,10 +1240,10 @@ int udp_rcv(struct sk_buff *skb) nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ - if (udp_checksum_complete(skb)) + if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS); + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1215,36 +1254,40 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + is_udplite? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), ulen, - len, + skb->len, NIPQUAD(daddr), ntohs(uh->dest)); -no_header: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return(0); + goto drop; csum_error: /* * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + is_udplite? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), NIPQUAD(daddr), ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return(0); } -static int udp_destroy_sock(struct sock *sk) +__inline__ int udp_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udp_hash, 0); +} + +int udp_destroy_sock(struct sock *sk) { lock_sock(sk); udp_flush_pending_frames(sk); @@ -1293,6 +1336,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, } break; + /* + * UDP-Lite's partial checksum coverage (RFC 3828). + */ + /* The sender sets actual checksum coverage length via this option. + * The case coverage > packet length is handled by send module. */ + case UDPLITE_SEND_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ + val = 8; + up->pcslen = val; + up->pcflag |= UDPLITE_SEND_CC; + break; + + /* The receiver specifies a minimum checksum coverage value. To make + * sense, this should be set to at least 8 (as done below). If zero is + * used, this again means full checksum coverage. */ + case UDPLITE_RECV_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Avoid silly minimal values. */ + val = 8; + up->pcrlen = val; + up->pcflag |= UDPLITE_RECV_CC; + break; + default: err = -ENOPROTOOPT; break; @@ -1301,21 +1370,21 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, return err; } -static int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return ip_setsockopt(sk, level, optname, optval, optlen); - return do_udp_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udp_setsockopt(sk, level, optname, optval, optlen); + return ip_setsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return compat_ip_setsockopt(sk, level, optname, optval, optlen); - return do_udp_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udp_setsockopt(sk, level, optname, optval, optlen); + return compat_ip_setsockopt(sk, level, optname, optval, optlen); } #endif @@ -1342,6 +1411,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + /* The following two cannot be changed on UDP sockets, the return is + * always 0 (which corresponds to the full checksum coverage of UDP). */ + case UDPLITE_SEND_CSCOV: + val = up->pcslen; + break; + + case UDPLITE_RECV_CSCOV: + val = up->pcrlen; + break; + default: return -ENOPROTOOPT; }; @@ -1353,21 +1432,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, return 0; } -static int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return ip_getsockopt(sk, level, optname, optval, optlen); - return do_udp_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udp_getsockopt(sk, level, optname, optval, optlen); + return ip_getsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udp_getsockopt(struct sock *sk, int level, int optname, +int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return compat_ip_getsockopt(sk, level, optname, optval, optlen); - return do_udp_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udp_getsockopt(sk, level, optname, optval, optlen); + return compat_ip_getsockopt(sk, level, optname, optval, optlen); } #endif /** @@ -1387,7 +1466,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - + int is_lite = IS_UDPLITE(sk); + /* Check for false positives due to checksum errors */ if ( (mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && @@ -1397,8 +1477,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL) { - if (udp_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + if (udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } else { @@ -1420,7 +1500,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, - .close = udp_close, + .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, @@ -1431,8 +1511,8 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_v4_hash, - .unhash = udp_v4_unhash, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), #ifdef CONFIG_COMPAT @@ -1451,7 +1531,7 @@ static struct sock *udp_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, &udp_hash[state->bucket]) { + sk_for_each(sk, node, state->hashtable + state->bucket) { if (sk->sk_family == state->family) goto found; } @@ -1472,7 +1552,7 @@ try_again: } while (sk && sk->sk_family != state->family); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { - sk = sk_head(&udp_hash[state->bucket]); + sk = sk_head(state->hashtable + state->bucket); goto try_again; } return sk; @@ -1522,6 +1602,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) if (!s) goto out; s->family = afinfo->family; + s->hashtable = afinfo->hashtable; s->seq_ops.start = udp_seq_start; s->seq_ops.next = udp_seq_next; s->seq_ops.show = afinfo->seq_show; @@ -1588,7 +1669,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) atomic_read(&sp->sk_refcnt), sp); } -static int udp4_seq_show(struct seq_file *seq, void *v) +int udp4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -1611,6 +1692,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { .owner = THIS_MODULE, .name = "udp", .family = AF_INET, + .hashtable = udp_hash, .seq_show = udp4_seq_show, .seq_fops = &udp4_seq_fops, }; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h new file mode 100644 index 00000000000..f6f4277ba6d --- /dev/null +++ b/net/ipv4/udp_impl.h @@ -0,0 +1,38 @@ +#ifndef _UDP4_IMPL_H +#define _UDP4_IMPL_H +#include +#include +#include +#include + +extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); + +extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, + struct hlist_head udptable[], int *port_rover, + int (*)(const struct sock*,const struct sock*)); +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); + + +extern int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); + +#ifdef CONFIG_COMPAT +extern int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +#endif +extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +extern int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags); +extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); +extern int udp_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int udp4_seq_show(struct seq_file *seq, void *v); +#endif +#endif /* _UDP4_IMPL_H */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c new file mode 100644 index 00000000000..561de6d8c73 --- /dev/null +++ b/net/ipv4/udplite.c @@ -0,0 +1,119 @@ +/* + * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). + * + * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ + * + * Authors: Gerrit Renker + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" +DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; + +struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +static int udplite_port_rover; + +__inline__ int udplite_get_port(struct sock *sk, unsigned short p, + int (*c)(const struct sock *, const struct sock *)) +{ + return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); +} + +static __inline__ int udplite_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +__inline__ int udplite_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udplite_hash, 1); +} + +__inline__ void udplite_err(struct sk_buff *skb, u32 info) +{ + return __udp4_lib_err(skb, info, udplite_hash); +} + +static struct net_protocol udplite_protocol = { + .handler = udplite_rcv, + .err_handler = udplite_err, + .no_policy = 1, +}; + +struct proto udplite_prot = { + .name = "UDP-Lite", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v4_get_port, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif +}; + +static struct inet_protosw udplite4_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplite_prot, + .ops = &inet_dgram_ops, + .capability = -1, + .no_check = 0, /* must checksum (RFC 3828) */ + .flags = INET_PROTOSW_PERMANENT, +}; + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite4_seq_fops; +static struct udp_seq_afinfo udplite4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite", + .family = AF_INET, + .hashtable = udplite_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udplite4_seq_fops, +}; +#endif + +void __init udplite4_register(void) +{ + if (proto_register(&udplite_prot, 1)) + goto out_register_err; + + if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) + goto out_unregister_proto; + + inet_register_protosw(&udplite4_protosw); + +#ifdef CONFIG_PROC_FS + if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ + printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); +#endif + return; + +out_unregister_proto: + proto_unregister(&udplite_prot); +out_register_err: + printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__); +} + +EXPORT_SYMBOL(udplite_hash); +EXPORT_SYMBOL(udplite_prot); +EXPORT_SYMBOL(udplite_get_port); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index da766234607..d4107bb701b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -199,6 +199,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index addcc011bc0..8bacda109b7 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,8 +5,8 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ - protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 92bfccf62cb..1eb1c7f261d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -737,8 +738,13 @@ static int __init init_ipv6_mibs(void) if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), __alignof__(struct udp_mib)) < 0) goto err_udp_mib; + if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), + __alignof__(struct udp_mib)) < 0) + goto err_udplite_mib; return 0; +err_udplite_mib: + snmp6_mib_free((void **)udp_stats_in6); err_udp_mib: snmp6_mib_free((void **)icmpv6_statistics); err_icmp_mib: @@ -753,6 +759,7 @@ static void cleanup_ipv6_mibs(void) snmp6_mib_free((void **)ipv6_statistics); snmp6_mib_free((void **)icmpv6_statistics); snmp6_mib_free((void **)udp_stats_in6); + snmp6_mib_free((void **)udplite_stats_in6); } static int __init inet6_init(void) @@ -780,10 +787,14 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; - err = proto_register(&rawv6_prot, 1); + err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; + err = proto_register(&rawv6_prot, 1); + if (err) + goto out_unregister_udplite_proto; + /* Register the socket-side information for inet6_create. */ for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) @@ -837,6 +848,8 @@ static int __init inet6_init(void) goto proc_tcp6_fail; if (udp6_proc_init()) goto proc_udp6_fail; + if (udplite6_proc_init()) + goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; @@ -862,6 +875,7 @@ static int __init inet6_init(void) /* Init v6 transport protocols. */ udpv6_init(); + udplitev6_init(); tcpv6_init(); ipv6_packet_init(); @@ -879,6 +893,8 @@ proc_if6_fail: proc_anycast6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: + udplite6_proc_exit(); +proc_udplite6_fail: udp6_proc_exit(); proc_udp6_fail: tcp6_proc_exit(); @@ -902,6 +918,8 @@ out_unregister_sock: sock_unregister(PF_INET6); out_unregister_raw_proto: proto_unregister(&rawv6_prot); +out_unregister_udplite_proto: + proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: @@ -919,6 +937,7 @@ static void __exit inet6_exit(void) ac6_proc_exit(); ipv6_misc_proc_exit(); udp6_proc_exit(); + udplite6_proc_exit(); tcp6_proc_exit(); raw6_proc_exit(); #endif diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index de6b91981b3..1eafcfc95e8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && + sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) break; @@ -276,11 +278,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { + struct proto *prot = &udp_prot; + + if (sk->sk_protocol == IPPROTO_UDPLITE) + prot = &udplite_prot; local_bh_disable(); sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(&udp_prot); + sock_prot_inc_use(prot); local_bh_enable(); - sk->sk_prot = &udp_prot; + sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } @@ -813,6 +819,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && + sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cf537d3018..3cb6bb79cc0 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -270,11 +270,15 @@ static void dump_packet(const struct nf_loginfo *info, } break; } - case IPPROTO_UDP: { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: { struct udphdr _udph, *uh; - /* Max length: 10 "PROTO=UDP " */ - printk("PROTO=UDP "); + if (currenthdr == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP " ); + else /* Max length: 14 "PROTO=UDPLITE " */ + printk("PROTO=UDPLITE "); if (fragment) break; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 4158d386b0a..35249d8487b 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -49,6 +49,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) fold_prot_inuse(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", fold_prot_inuse(&udpv6_prot)); + seq_printf(seq, "UDPLITE6: inuse %d\n", + fold_prot_inuse(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -133,6 +135,14 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; +static struct snmp_mib snmp6_udplite6_list[] = { + SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), + SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), + SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), + SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_SENTINEL +}; + static unsigned long fold_field(void *mib[], int offt) { @@ -167,6 +177,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); + snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); } return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 886300d13a5..5a64027bf2f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -38,26 +38,18 @@ #include #include -#include -#include - -#include #include #include #include #include -#include -#include -#include #include -#include #include - #include #include #include #include +#include "udp_impl.h" DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; @@ -66,23 +58,9 @@ static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } -static void udp_v6_hash(struct sock *sk) -{ - BUG(); -} - -static void udp_v6_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_dec_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); -} - -static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, int dif) +static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -90,7 +68,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && sk->sk_family == PF_INET6) { @@ -131,21 +109,12 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, return result; } -/* - * - */ - -static void udpv6_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - /* * This should be easy, if there is something there we * return it, otherwise we block. */ -static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { @@ -153,7 +122,7 @@ static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; size_t copied; - int err; + int err, copy_only, is_udplite = IS_UDPLITE(sk); if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -172,15 +141,21 @@ try_again: msg->msg_flags |= MSG_TRUNC; } - if (skb->ip_summed==CHECKSUM_UNNECESSARY) { - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else if (msg->msg_flags&MSG_TRUNC) { - if (__skb_checksum_complete(skb)) + /* + * Decide whether to checksum and/or copy data. + */ + copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); + + if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { + if (__udp_lib_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else { + copy_only = 1; + } + + if (copy_only) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; @@ -231,14 +206,15 @@ csum_copy_err: skb_kill_datagram(sk, skb, flags); if (flags & MSG_DONTWAIT) { - UDP6_INC_STATS_USER(UDP_MIB_INERRORS); + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); return -EAGAIN; } goto try_again; } -static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) +void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info, + struct hlist_head udptable[] ) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; @@ -248,8 +224,8 @@ static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; - sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, inet6_iif(skb)); - + sk = __udp6_lib_lookup(daddr, uh->dest, + saddr, uh->source, inet6_iif(skb), udptable); if (sk == NULL) return; @@ -270,31 +246,55 @@ out: sock_put(sk); } -static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static __inline__ void udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, int type, + int code, int offset, __u32 info ) +{ + return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); +} + +int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { + struct udp_sock *up = udp_sk(sk); int rc; - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return -1; - } + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; - if (skb_checksum_complete(skb)) { - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return 0; + /* + * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). + */ + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" + " %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " + "too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } } + if (udp_lib_checksum_complete(skb)) + goto drop; + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return 0; + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + goto drop; } - UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return 0; +drop: + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + kfree_skb(skb); + return -1; } static struct sock *udp_v6_mcast_next(struct sock *sk, @@ -338,15 +338,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static void udpv6_mcast_deliver(struct udphdr *uh, - struct in6_addr *saddr, struct in6_addr *daddr, - struct sk_buff *skb) +static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, + struct in6_addr *daddr, struct hlist_head udptable[]) { struct sock *sk, *sk2; + const struct udphdr *uh = skb->h.uh; int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -364,9 +364,34 @@ static void udpv6_mcast_deliver(struct udphdr *uh, udpv6_queue_rcv_skb(sk, skb); out: read_unlock(&udp_hash_lock); + return 0; } -static int udpv6_rcv(struct sk_buff **pskb) +static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) + +{ + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDP, skb->csum )) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDP, 0); + + return (UDP_SKB_CB(skb)->partial_cov = 0); +} + +int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], + int is_udplite) { struct sk_buff *skb = *pskb; struct sock *sk; @@ -383,44 +408,39 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; - /* Check for jumbo payload */ - if (ulen == 0) - ulen = skb->len; + if(! is_udplite ) { /* UDP validates ulen. */ - if (ulen > skb->len || ulen < sizeof(*uh)) - goto short_packet; + /* Check for jumbo payload */ + if (ulen == 0) + ulen = skb->len; - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - goto discard; - } + if (ulen < sizeof(*uh)) + goto short_packet; - if (ulen < skb->len) { - if (pskb_trim_rcsum(skb, ulen)) - goto discard; - saddr = &skb->nh.ipv6h->saddr; - daddr = &skb->nh.ipv6h->daddr; - uh = skb->h.uh; - } + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) + goto short_packet; + saddr = &skb->nh.ipv6h->saddr; + daddr = &skb->nh.ipv6h->daddr; + uh = skb->h.uh; + } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (udp6_csum_init(skb, uh)) + goto discard; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); + } else { /* UDP-Lite validates cscov. */ + if (udplite6_csum_init(skb, uh)) + goto discard; + } /* * Multicast receive code */ - if (ipv6_addr_is_multicast(daddr)) { - udpv6_mcast_deliver(uh, saddr, daddr, skb); - return 0; - } + if (ipv6_addr_is_multicast(daddr)) + return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); /* Unicast */ @@ -428,15 +448,16 @@ static int udpv6_rcv(struct sk_buff **pskb) * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, inet6_iif(skb)); + sk = __udp6_lib_lookup(saddr, uh->source, + daddr, uh->dest, inet6_iif(skb), udptable); if (sk == NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - if (skb_checksum_complete(skb)) + if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -451,14 +472,20 @@ static int udpv6_rcv(struct sk_buff **pskb) return(0); short_packet: - if (net_ratelimit()) - printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len); + LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", + is_udplite? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return(0); } + +static __inline__ int udpv6_rcv(struct sk_buff **pskb) +{ + return __udp6_lib_rcv(pskb, udp_hash, 0); +} + /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -484,6 +511,7 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; + u32 csum = 0; /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) @@ -498,35 +526,17 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->len = htons(up->len); uh->check = 0; - if (sk->sk_no_check == UDP_CSUM_NOXMIT) { - skb->ip_summed = CHECKSUM_NONE; - goto send; - } - - if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - uh->check = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - up->len, fl->proto, skb->csum); - } else { - u32 tmp_csum = 0; - - skb_queue_walk(&sk->sk_write_queue, skb) { - tmp_csum = csum_add(tmp_csum, skb->csum); - } - tmp_csum = csum_partial((char *)uh, - sizeof(struct udphdr), tmp_csum); - tmp_csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - up->len, fl->proto, tmp_csum); - uh->check = tmp_csum; + if (up->pcflag) + csum = udplite_csum_outgoing(sk, skb); + else + csum = udp_csum_outgoing(sk, skb); - } + /* add protocol-dependent pseudo-header */ + uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, + up->len, fl->proto, csum ); if (uh->check == 0) uh->check = -1; -send: err = ip6_push_pending_frames(sk); out: up->len = 0; @@ -534,7 +544,7 @@ out: return err; } -static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; @@ -554,6 +564,8 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; + int is_udplite = up->pcflag; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); /* destination address check */ if (sin6) { @@ -694,7 +706,7 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = IPPROTO_UDP; + fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, daddr); if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -761,7 +773,8 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); @@ -793,7 +806,7 @@ do_append_data: out: fl6_sock_release(flowlabel); if (!err) { - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return len; } /* @@ -804,7 +817,7 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -816,7 +829,7 @@ do_confirm: goto out; } -static int udpv6_destroy_sock(struct sock *sk) +int udpv6_destroy_sock(struct sock *sk) { lock_sock(sk); udp_v6_flush_pending_frames(sk); @@ -854,7 +867,6 @@ static int do_udpv6_setsockopt(struct sock *sk, int level, int optname, release_sock(sk); } break; - case UDP_ENCAP: switch (val) { case 0: @@ -866,6 +878,24 @@ static int do_udpv6_setsockopt(struct sock *sk, int level, int optname, } break; + case UDPLITE_SEND_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ + val = 8; + up->pcslen = val; + up->pcflag |= UDPLITE_SEND_CC; + break; + + case UDPLITE_RECV_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Avoid silly minimal values. */ + val = 8; + up->pcrlen = val; + up->pcflag |= UDPLITE_RECV_CC; + break; + default: err = -ENOPROTOOPT; break; @@ -874,22 +904,21 @@ static int do_udpv6_setsockopt(struct sock *sk, int level, int optname, return err; } -static int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return ipv6_setsockopt(sk, level, optname, optval, optlen); - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + return ipv6_setsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return compat_ipv6_setsockopt(sk, level, optname, - optval, optlen); - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); } #endif @@ -916,6 +945,14 @@ static int do_udpv6_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + case UDPLITE_SEND_CSCOV: + val = up->pcslen; + break; + + case UDPLITE_RECV_CSCOV: + val = up->pcrlen; + break; + default: return -ENOPROTOOPT; }; @@ -927,22 +964,21 @@ static int do_udpv6_getsockopt(struct sock *sk, int level, int optname, return 0; } -static int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return ipv6_getsockopt(sk, level, optname, optval, optlen); - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + return ipv6_getsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return compat_ipv6_getsockopt(sk, level, optname, - optval, optlen); - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } #endif @@ -983,7 +1019,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket atomic_read(&sp->sk_refcnt), sp); } -static int udp6_seq_show(struct seq_file *seq, void *v) +int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, @@ -1002,6 +1038,7 @@ static struct udp_seq_afinfo udp6_seq_afinfo = { .owner = THIS_MODULE, .name = "udp6", .family = AF_INET6, + .hashtable = udp_hash, .seq_show = udp6_seq_show, .seq_fops = &udp6_seq_fops, }; @@ -1021,7 +1058,7 @@ void udp6_proc_exit(void) { struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, - .close = udpv6_close, + .close = udp_lib_close, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, @@ -1031,8 +1068,8 @@ struct proto udpv6_prot = { .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .backlog_rcv = udpv6_queue_rcv_skb, - .hash = udp_v6_hash, - .unhash = udp_v6_unhash, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), #ifdef CONFIG_COMPAT diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h new file mode 100644 index 00000000000..ec987889912 --- /dev/null +++ b/net/ipv6/udp_impl.h @@ -0,0 +1,34 @@ +#ifndef _UDP6_IMPL_H +#define _UDP6_IMPL_H +#include +#include +#include +#include +#include + +extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); +extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, + int , int , int , __be32 , struct hlist_head []); + +extern int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +extern int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +#ifdef CONFIG_COMPAT +extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +#endif +extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len); +extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len); +extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); +extern int udpv6_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int udp6_seq_show(struct seq_file *seq, void *v); +#endif +#endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c new file mode 100644 index 00000000000..e0ec5e63004 --- /dev/null +++ b/net/ipv6/udplite.c @@ -0,0 +1,105 @@ +/* + * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. + * See also net/ipv4/udplite.c + * + * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ + * + * Authors: Gerrit Renker + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" + +DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; + +static __inline__ int udplitev6_rcv(struct sk_buff **pskb) +{ + return __udp6_lib_rcv(pskb, udplite_hash, 1); +} + +static __inline__ void udplitev6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, + int type, int code, int offset, __u32 info) +{ + return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); +} + +static struct inet6_protocol udplitev6_protocol = { + .handler = udplitev6_rcv, + .err_handler = udplitev6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static __inline__ int udplite_v6_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); +} + +struct proto udplitev6_prot = { + .name = "UDPLITEv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .backlog_rcv = udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v6_get_port, + .obj_size = sizeof(struct udp6_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, +#endif +}; + +static struct inet_protosw udplite6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplitev6_prot, + .ops = &inet6_dgram_ops, + .capability = -1, + .no_check = 0, + .flags = INET_PROTOSW_PERMANENT, +}; + +void __init udplitev6_init(void) +{ + if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) + printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__); + + inet6_register_protosw(&udplite6_protosw); +} + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite6_seq_fops; +static struct udp_seq_afinfo udplite6_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite6", + .family = AF_INET6, + .hashtable = udplite_hash, + .seq_show = udp6_seq_show, + .seq_fops = &udplite6_seq_fops, +}; + +int __init udplite6_proc_init(void) +{ + return udp_proc_register(&udplite6_seq_afinfo); +} + +void udplite6_proc_exit(void) +{ + udp_proc_unregister(&udplite6_seq_afinfo); +} +#endif diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2fba1f0739a..8dffd4daae9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -274,6 +274,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) break; case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index b4293058c6f..1602086c7fd 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -1,5 +1,5 @@ -/* Kernel module to match one of a list of TCP/UDP/SCTP/DCCP ports: ports are in - the same place so we can treat them as equal. */ +/* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports: + ports are in the same place so we can treat them as equal. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team @@ -162,6 +162,7 @@ check(u_int16_t proto, { /* Must specify supported protocol, no unknown flags or bad count */ return (proto == IPPROTO_TCP || proto == IPPROTO_UDP + || proto == IPPROTO_UDPLITE || proto == IPPROTO_SCTP || proto == IPPROTO_DCCP) && !(ip_invflags & XT_INV_PROTO) && (match_flags == XT_MULTIPORT_SOURCE diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index e76a68e0bc6..46414b562a1 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -10,7 +10,7 @@ #include #include -MODULE_DESCRIPTION("x_tables match for TCP and UDP, supports IPv4 and IPv6"); +MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xt_tcp"); MODULE_ALIAS("xt_udp"); @@ -234,6 +234,24 @@ static struct xt_match xt_tcpudp_match[] = { .proto = IPPROTO_UDP, .me = THIS_MODULE, }, + { + .name = "udplite", + .family = AF_INET, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDPLITE, + .me = THIS_MODULE, + }, + { + .name = "udplite", + .family = AF_INET6, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDPLITE, + .me = THIS_MODULE, + }, }; static int __init xt_tcpudp_init(void) -- cgit v1.2.3 From 6b11687ef003ed595033da89643c8995676f929d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Nov 2006 21:28:24 -0800 Subject: [NET]: Annotate csum_tcpudp_magic() callers in net/* Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- net/ipv4/udp.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 363960872de..826aaecdb99 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -807,9 +807,9 @@ static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) /* * Calculate(/check) TCP checksum */ -static inline u16 tcp_v4_check(struct tcphdr *th, int len, - unsigned long saddr, unsigned long daddr, - unsigned long base) +static inline __sum16 tcp_v4_check(struct tcphdr *th, int len, + __be32 saddr, __be32 daddr, + __wsum base) { return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 98ba7509617..f9c4ed7207b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -414,8 +414,9 @@ static void udp_flush_pending_frames(struct sock *sk) static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, int len ) { - unsigned int csum = 0, offset; + unsigned int offset; struct udphdr *uh = skb->h.uh; + __wsum csum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { /* -- cgit v1.2.3 From f6ab028804bdc580fe0915494dbf31f5ea473ca7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Nov 2006 02:36:50 -0800 Subject: [NET]: Make mangling a checksum (0 -> 0xffff on the wire) explicit. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/checksum.h | 1 + net/ipv4/ipvs/ip_vs_proto_udp.c | 6 +++--- net/ipv4/netfilter/ip_nat_helper.c | 2 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 2 +- net/ipv4/udp.c | 4 ++-- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/checksum.h b/include/net/checksum.h index 2b3c8dc6c11..124246172a8 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -92,4 +92,5 @@ static inline __wsum csum_unfold(__sum16 n) return (__force __wsum)n; } +#define CSUM_MANGLED_0 ((__force __sum16)0xffff) #endif diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 261581e6a81..3647397e416 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -125,7 +125,7 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, ip_vs_check_diff(oldport ^ htons(0xFFFF), newport, uhdr->check)); if (!uhdr->check) - uhdr->check = -1; + uhdr->check = CSUM_MANGLED_0; } static int @@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = -1; + udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); @@ -228,7 +228,7 @@ udp_dnat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = -1; + udph->check = CSUM_MANGLED_0; (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; } return 1; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 3bf85848055..3e7fd64c216 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -264,7 +264,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, csum_partial((char *)udph, datalen, 0)); if (!udph->check) - udph->check = -1; + udph->check = CSUM_MANGLED_0; } else udph->check = nf_proto_csum_update(*pskb, htons(oldlen) ^ htons(0xFFFF), diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 4bbec7730d1..82f8a6ab07e 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -121,7 +121,7 @@ udp_manip_pkt(struct sk_buff **pskb, *portptr ^ htons(0xFFFF), newport, hdr->check, 0); if (!hdr->check) - hdr->check = -1; + hdr->check = CSUM_MANGLED_0; } *portptr = newport; return 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f9c4ed7207b..dc19ba1e73b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -441,7 +441,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) - uh->check = -1; + uh->check = CSUM_MANGLED_0; } } @@ -490,7 +490,7 @@ int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, sk->sk_protocol, csum ); if (uh->check == 0) - uh->check = -1; + uh->check = CSUM_MANGLED_0; send: err = ip_push_pending_frames(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 9b21d67ca48..c2e629d6aea 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -536,8 +536,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, &fl->fl6_dst, total_len, fl->proto, tmp_csum); - if (tmp_csum == 0 && fl->proto == IPPROTO_UDP) - tmp_csum = -1; + if (csum == 0 && fl->proto == IPPROTO_UDP) + csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) BUG(); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0d22008d522..bb45b9b7cbd 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -536,7 +536,7 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, up->len, fl->proto, csum ); if (uh->check == 0) - uh->check = -1; + uh->check = CSUM_MANGLED_0; err = ip6_push_pending_frames(sk); out: -- cgit v1.2.3 From 078250d68d883495f31d0e6ce8321292f2dc1cfa Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 15 Nov 2006 00:44:49 -0800 Subject: [NET/IPv4]: Make udp_push_pending_frames static udp_push_pending_frames is only referenced within net/ipv4/udp.c and hence can remain static. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc19ba1e73b..7eb76fbf1b4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -448,7 +448,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, /* * Push out all pending data as one UDP datagram. Socket is locked. */ -int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) +static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) { struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; -- cgit v1.2.3 From 8e5200f54062b8af0ed1d186ea0f113854786d89 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Nov 2006 18:06:37 -0800 Subject: [NET]: Fix assorted misannotations (from md5 and udplite merges). Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- include/net/udplite.h | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 2 +- net/ipv6/udplite.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index aa7989c5379..c99774f15eb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1154,7 +1154,7 @@ extern int tcp_v4_md5_do_add(struct sock *sk, u8 newkeylen); extern int tcp_v4_md5_do_del(struct sock *sk, - u32 addr); + __be32 addr); extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); extern void tcp_free_md5sig_pool(void); diff --git a/include/net/udp.h b/include/net/udp.h index c5ccd9a3387..eac69ff0582 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -77,7 +77,7 @@ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) skb->csum)); } -static inline __sum16 udp_lib_checksum_complete(struct sk_buff *skb) +static inline int udp_lib_checksum_complete(struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __udp_lib_checksum_complete(skb); diff --git a/include/net/udplite.h b/include/net/udplite.h index 3abaab7b78c..67ac5142430 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -90,9 +90,9 @@ static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh) int rc = udplite_checksum_init(skb, uh); if (!rc) - skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, + skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, - skb->len, IPPROTO_UDPLITE, 0); + skb->len, IPPROTO_UDPLITE, 0)); return rc; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7d5522092e..e9d467124c4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1020,7 +1020,7 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, #ifdef CONFIG_TCP_MD5SIG_DEBUG int i; #endif - __u16 old_checksum; + __sum16 old_checksum; struct tcp_md5sig_pool *hp; struct tcp4_pseudohdr *bp; struct hash_desc *desc; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7eb76fbf1b4..28e4cf662ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,7 +114,7 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -static inline int __udp_lib_lport_inuse(__be16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; @@ -455,7 +455,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) struct sk_buff *skb; struct udphdr *uh; int err = 0; - u32 csum = 0; + __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d2170da77e5..0adb337c4b7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -739,7 +739,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct scatterlist sg[4]; __u16 data_len; int block = 0; - __u16 cksum; + __sum16 cksum; struct tcp_md5sig_pool *hp; struct tcp6_pseudohdr *bp; struct hash_desc *desc; @@ -1032,7 +1032,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG if (key) { - u32 *opt = (u32*)(t1 + 1); + __be32 *opt = (__be32*)(t1 + 1); opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index efa8950ddd3..b3ea8af50a9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -248,7 +248,7 @@ out: static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, - int code, int offset, __u32 info ) + int code, int offset, __be32 info ) { return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index e0ec5e63004..d4cafacc235 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -24,7 +24,7 @@ static __inline__ int udplitev6_rcv(struct sk_buff **pskb) static __inline__ void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); } -- cgit v1.2.3 From ff1dcadb1b55dbf471c5ed109dbbdf06bd19ef3b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Nov 2006 18:07:29 -0800 Subject: [NET]: Split skb->csum ... into anonymous union of __wsum and __u32 (csum and csum_offset resp.) Signed-off-by: Al Viro Signed-off-by: David S. Miller --- drivers/net/cassini.c | 2 +- drivers/net/e1000/e1000_main.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/myri10ge/myri10ge.c | 2 +- drivers/net/sk98lin/skge.c | 4 ++-- drivers/net/skge.c | 2 +- drivers/net/sky2.c | 2 +- drivers/net/sungem.c | 2 +- drivers/net/sunhme.c | 2 +- include/linux/skbuff.h | 5 ++++- net/core/dev.c | 4 ++-- net/core/skbuff.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 15 files changed, 22 insertions(+), 19 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 521c5b71023..fd2cc13f7d9 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2825,7 +2825,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); - csum_stuff_off = (u64) ((skb->h.raw + skb->csum) - skb->data); + csum_stuff_off = csum_start_off + skb->csum_offset; ctrl = TX_DESC_CSUM_EN | CAS_BASE(TX_DESC_CSUM_START, csum_start_off) | diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 7a0828869ec..32dde0adb68 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2826,7 +2826,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, context_desc = E1000_CONTEXT_DESC(*tx_ring, i); context_desc->upper_setup.tcp_fields.tucss = css; - context_desc->upper_setup.tcp_fields.tucso = css + skb->csum; + context_desc->upper_setup.tcp_fields.tucso = css + skb->csum_offset; context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.data = 0; context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index e09f575a3a3..7b127212e62 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1249,7 +1249,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb) if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) { struct ixgb_buffer *buffer_info; css = skb->h.raw - skb->data; - cso = (skb->h.raw + skb->csum) - skb->data; + cso = css + skb->csum_offset; i = adapter->tx_ring.next_to_use; context_desc = IXGB_CONTEXT_DESC(adapter->tx_ring, i); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 806081b5973..36350e6db1c 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1955,7 +1955,7 @@ again: flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { cksum_offset = (skb->h.raw - skb->data); - pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data; + pseudo_hdr_offset = cksum_offset + skb->csum_offset; /* If the headers are excessively large, then we must * fall back to a software checksum */ if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index a5d41ebc9fb..12cbfd190dd 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -1562,7 +1562,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; - u16 offset = hdrlen + pMessage->csum; + u16 offset = hdrlen + pMessage->csum_offset; if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) && (pAC->GIni.GIChipRev == 0) && @@ -1681,7 +1681,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ */ if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; - u16 offset = hdrlen + pMessage->csum; + u16 offset = hdrlen + pMessage->csum_offset; Control = BMU_STFWD; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 27b537c8d5e..5513907e839 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2565,7 +2565,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) td->csum_offs = 0; td->csum_start = offset; - td->csum_write = offset + skb->csum; + td->csum_write = offset + skb->csum_offset; } else control = BMU_CHECK; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 0ef1848b976..842abd9396c 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1350,7 +1350,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) u32 tcpsum; tcpsum = offset << 16; /* sum start */ - tcpsum |= offset + skb->csum; /* sum write */ + tcpsum |= offset + skb->csum_offset; /* sum write */ ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; if (skb->nh.iph->protocol == IPPROTO_UDP) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 253e96e7ad2..334c6cfd659 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1030,7 +1030,7 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); - csum_stuff_off = (u64) ((skb->h.raw + skb->csum) - skb->data); + csum_stuff_off = csum_start_off + skb->csum_offset; ctrl = (TXDCTRL_CENAB | (csum_start_off << 15) | diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index 9d7cd130c19..ec432ea879f 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -2272,7 +2272,7 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 csum_start_off, csum_stuff_off; csum_start_off = (u32) (skb->h.raw - skb->data); - csum_stuff_off = (u32) ((skb->h.raw + skb->csum) - skb->data); + csum_stuff_off = csum_start_off + skb->csum_offset; tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE | ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fcab543d79a..14ec16d2d9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -274,7 +274,10 @@ struct sk_buff { unsigned int len, data_len, mac_len; - __wsum csum; + union { + __wsum csum; + __u32 csum_offset; + }; __u32 priority; __u8 local_df:1, cloned:1, diff --git a/net/core/dev.c b/net/core/dev.c index 1a36b17f4b5..59d058a3b50 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1191,9 +1191,9 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb->tail - skb->h.raw; BUG_ON(offset <= 0); - BUG_ON(skb->csum + 2 > offset); + BUG_ON(skb->csum_offset + 2 > offset); - *(__sum16*)(skb->h.raw + skb->csum) = csum_fold(csum); + *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 07c25d60192..a90bc439488 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1414,7 +1414,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) skb->len - csstart, 0); if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum; + long csstuff = csstart + skb->csum_offset; *((__sum16 *)(to + csstuff)) = csum_fold(csum); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9d467124c4..4913f25e5ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -504,7 +504,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, csum_partial((char *)th, @@ -526,7 +526,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28e4cf662ce..1807a30694d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -422,7 +422,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, /* * Only one fragment on the socket. */ - skb->csum = offsetof(struct udphdr, check); + skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0adb337c4b7..517c50024bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -948,7 +948,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, csum_partial((char *)th, th->doff<<2, @@ -970,7 +970,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, IPPROTO_TCP, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; } -- cgit v1.2.3 From 4c0a6cb0db19de411c4bf7fcdc79d4c7c4ccafb1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 09:29:59 -0800 Subject: [UDP(-Lite)]: consolidate v4 and v6 get|setsockopt code This patch consolidates set/getsockopt code between UDP(-Lite) v4 and 6. The justification is that UDP(-Lite) is a transport-layer protocol and therefore the socket option code (at least in theory) should be AF-independent. Furthermore, there is the following code reduplication: * do_udp{,v6}_getsockopt is 100% identical between v4 and v6 * do_udp{,v6}_setsockopt is identical up to the following differerence --v4 in contrast to v4 additionally allows the experimental encapsulation types UDP_ENCAP_ESPINUDP and UDP_ENCAP_ESPINUDP_NON_IKE --the remainder is identical between v4 and v6 I believe that this difference is of little relevance. The advantages in not duplicating twice almost completely identical code. The patch further simplifies the interface of udp{,v6}_push_pending_frames, since for the second argument (struct udp_sock *up) it always holds that up = udp_sk(sk); where sk is the first function argument. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/udp.h | 5 +++ net/ipv4/udp.c | 30 ++++++++------ net/ipv6/udp.c | 118 +++++------------------------------------------------- 3 files changed, 32 insertions(+), 121 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/udp.h b/include/net/udp.h index eac69ff0582..1548d68d45d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -134,6 +134,11 @@ extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int udp_disconnect(struct sock *sk, int flags); extern unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); +extern int udp_lib_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen, + int (*push_pending_frames)(struct sock *)); DECLARE_SNMP_STAT(struct udp_mib, udp_statistics); /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1807a30694d..035915fc9ed 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -448,8 +448,9 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) +static int udp_push_pending_frames(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; @@ -673,7 +674,7 @@ do_append_data: if (err) udp_flush_pending_frames(sk); else if (!corkreq) - err = udp_push_pending_frames(sk, up); + err = udp_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; release_sock(sk); @@ -746,7 +747,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, up->len += size; if (!(up->corkflag || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk, up); + ret = udp_push_pending_frames(sk); if (!ret) ret = size; out: @@ -1299,8 +1300,9 @@ int udp_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -static int do_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udp_lib_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen, + int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); int val; @@ -1319,7 +1321,7 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, } else { up->corkflag = 0; lock_sock(sk); - udp_push_pending_frames(sk, up); + (*push_pending_frames)(sk); release_sock(sk); } break; @@ -1375,7 +1377,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udp_setsockopt(sk, level, optname, optval, optlen); + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); return ip_setsockopt(sk, level, optname, optval, optlen); } @@ -1384,13 +1387,14 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udp_setsockopt(sk, level, optname, optval, optlen); + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); return compat_ip_setsockopt(sk, level, optname, optval, optlen); } #endif -static int do_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udp_lib_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { struct udp_sock *up = udp_sk(sk); int val, len; @@ -1437,7 +1441,7 @@ int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udp_getsockopt(sk, level, optname, optval, optlen); + return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ip_getsockopt(sk, level, optname, optval, optlen); } @@ -1446,7 +1450,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udp_getsockopt(sk, level, optname, optval, optlen); + return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ip_getsockopt(sk, level, optname, optval, optlen); } #endif @@ -1716,6 +1720,8 @@ EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); +EXPORT_SYMBOL(udp_lib_getsockopt); +EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b3ea8af50a9..f52a5c3cc0a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -505,10 +505,11 @@ static void udp_v6_flush_pending_frames(struct sock *sk) * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) +static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udphdr *uh; + struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; @@ -782,7 +783,7 @@ do_append_data: if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) - err = udp_v6_push_pending_frames(sk, up); + err = udp_v6_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; @@ -844,72 +845,12 @@ int udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -static int do_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - struct udp_sock *up = udp_sk(sk); - int val; - int err = 0; - - if(optlencorkflag = 1; - } else { - up->corkflag = 0; - lock_sock(sk); - udp_v6_push_pending_frames(sk, up); - release_sock(sk); - } - break; - case UDP_ENCAP: - switch (val) { - case 0: - up->encap_type = val; - break; - default: - err = -ENOPROTOOPT; - break; - } - break; - - case UDPLITE_SEND_CSCOV: - if (!up->pcflag) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ - val = 8; - up->pcslen = val; - up->pcflag |= UDPLITE_SEND_CC; - break; - - case UDPLITE_RECV_CSCOV: - if (!up->pcflag) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Avoid silly minimal values. */ - val = 8; - up->pcrlen = val; - up->pcflag |= UDPLITE_RECV_CC; - break; - - default: - err = -ENOPROTOOPT; - break; - }; - - return err; -} - int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -918,58 +859,17 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); } #endif -static int do_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct udp_sock *up = udp_sk(sk); - int val, len; - - if(get_user(len,optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - if(len < 0) - return -EINVAL; - - switch(optname) { - case UDP_CORK: - val = up->corkflag; - break; - - case UDP_ENCAP: - val = up->encap_type; - break; - - case UDPLITE_SEND_CSCOV: - val = up->pcslen; - break; - - case UDPLITE_RECV_CSCOV: - val = up->pcrlen; - break; - - default: - return -ENOPROTOOPT; - }; - - if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval, &val,len)) - return -EFAULT; - return 0; -} - int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -978,7 +878,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } #endif -- cgit v1.2.3