summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-05-03 16:20:44 -0700
committerDavid S. Miller <davem@davemloft.net>2010-05-03 16:24:31 -0700
commitf5460618405eec8c3300947a499011528a115acd (patch)
tree19aba5acb6d91d5a20400292aaf1881a6b26c118
parent4f70ecca9c57731b4acbe5043eb22e4416bd2368 (diff)
parent0e3aef8d09a8c11e3fb83cdcb24b5bc7421b3726 (diff)
Merge branch 'net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vxy/lksctp-dev
Add missing linux/vmalloc.h include to net/sctp/probe.c Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sctp/sm.h2
-rw-r--r--include/net/sctp/structs.h66
-rw-r--r--net/sctp/Kconfig12
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/associola.c13
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/output.c27
-rw-r--r--net/sctp/outqueue.c94
-rw-r--r--net/sctp/probe.c214
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c24
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/sctp/transport.c61
16 files changed, 365 insertions, 176 deletions
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e9a408718636..65946bc43d00 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -547,7 +547,7 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\
#define WORD_ROUND(s) (((s)+3)&~3)
/* Make a new instance of type. */
-#define t_new(type, flags) (type *)kmalloc(sizeof(type), flags)
+#define t_new(type, flags) (type *)kzalloc(sizeof(type), flags)
/* Compare two timevals. */
#define tv_lt(s, t) \
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 851c813adb3a..273a8bb683e3 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -437,7 +437,7 @@ sctp_vtag_verify_either(const struct sctp_chunk *chunk,
*/
if ((!sctp_test_T_bit(chunk) &&
(ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)) ||
- (sctp_test_T_bit(chunk) &&
+ (sctp_test_T_bit(chunk) && asoc->c.peer_vtag &&
(ntohl(chunk->sctp_hdr->vtag) == asoc->c.peer_vtag))) {
return 1;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 597f8e27aaf6..43257b903c82 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -643,17 +643,15 @@ struct sctp_pf {
struct sctp_datamsg {
/* Chunks waiting to be submitted to lower layer. */
struct list_head chunks;
- /* Chunks that have been transmitted. */
- size_t msg_size;
/* Reference counting. */
atomic_t refcnt;
/* When is this message no longer interesting to the peer? */
unsigned long expires_at;
/* Did the messenge fail to send? */
int send_error;
- char send_failed;
- /* Control whether chunks from this message can be abandoned. */
- char can_abandon;
+ u8 send_failed:1,
+ can_abandon:1, /* can chunks from this message can be abandoned. */
+ can_delay; /* should this message be Nagle delayed */
};
struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
@@ -757,7 +755,6 @@ struct sctp_chunk {
#define SCTP_NEED_FRTX 0x1
#define SCTP_DONT_FRTX 0x2
__u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
- resent:1, /* Has this chunk ever been resent. */
has_tsn:1, /* Does this chunk have a TSN yet? */
has_ssn:1, /* Does this chunk have a SSN yet? */
singleton:1, /* Only chunk in the packet? */
@@ -879,7 +876,30 @@ struct sctp_transport {
/* Reference counting. */
atomic_t refcnt;
- int dead;
+ int dead:1,
+ /* RTO-Pending : A flag used to track if one of the DATA
+ * chunks sent to this address is currently being
+ * used to compute a RTT. If this flag is 0,
+ * the next DATA chunk sent to this destination
+ * should be used to compute a RTT and this flag
+ * should be set. Every time the RTT
+ * calculation completes (i.e. the DATA chunk
+ * is SACK'd) clear this flag.
+ */
+ rto_pending:1,
+
+ /*
+ * hb_sent : a flag that signals that we have a pending
+ * heartbeat.
+ */
+ hb_sent:1,
+
+ /* Is the Path MTU update pending on this tranport */
+ pmtu_pending:1,
+
+ /* Is this structure kfree()able? */
+ malloced:1;
+
/* This is the peer's IP address and port. */
union sctp_addr ipaddr;
@@ -909,22 +929,6 @@ struct sctp_transport {
/* SRTT : The current smoothed round trip time. */
__u32 srtt;
- /* RTO-Pending : A flag used to track if one of the DATA
- * chunks sent to this address is currently being
- * used to compute a RTT. If this flag is 0,
- * the next DATA chunk sent to this destination
- * should be used to compute a RTT and this flag
- * should be set. Every time the RTT
- * calculation completes (i.e. the DATA chunk
- * is SACK'd) clear this flag.
- * hb_sent : a flag that signals that we have a pending heartbeat.
- */
- __u8 rto_pending;
- __u8 hb_sent;
-
- /* Flag to track the current fast recovery state */
- __u8 fast_recovery;
-
/*
* These are the congestion stats.
*/
@@ -944,9 +948,6 @@ struct sctp_transport {
__u32 burst_limited; /* Holds old cwnd when max.burst is applied */
- /* TSN marking the fast recovery exit point */
- __u32 fast_recovery_exit;
-
/* Destination */
struct dst_entry *dst;
/* Source address. */
@@ -977,9 +978,6 @@ struct sctp_transport {
*/
__u16 pathmaxrxt;
- /* is the Path MTU update pending on this tranport */
- __u8 pmtu_pending;
-
/* PMTU : The current known path MTU. */
__u32 pathmtu;
@@ -1023,8 +1021,6 @@ struct sctp_transport {
/* This is the list of transports that have chunks to send. */
struct list_head send_ready;
- int malloced; /* Is this structure kfree()able? */
-
/* State information saved for SFR_CACC algorithm. The key
* idea in SFR_CACC is to maintain state at the sender on a
* per-destination basis when a changeover happens.
@@ -1066,7 +1062,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
struct sctp_sock *);
void sctp_transport_pmtu(struct sctp_transport *);
void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *, int);
+void sctp_transport_reset_timers(struct sctp_transport *);
void sctp_transport_hold(struct sctp_transport *);
void sctp_transport_put(struct sctp_transport *);
void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1720,6 +1716,12 @@ struct sctp_association {
/* Highest TSN that is acknowledged by incoming SACKs. */
__u32 highest_sacked;
+ /* TSN marking the fast recovery exit point */
+ __u32 fast_recovery_exit;
+
+ /* Flag to track the current fast recovery state */
+ __u8 fast_recovery;
+
/* The number of unacknowledged data chunks. Reported through
* the SCTP_STATUS sockopt.
*/
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e882a187..126b014eb79b 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
if IP_SCTP
+config NET_SCTPPROBE
+ tristate "SCTP: Association probing"
+ depends on PROC_FS && KPROBES
+ ---help---
+ This module allows for capturing the changes to SCTP association
+ state in response to incoming packets. It is used for debugging
+ SCTP congestion control algorithms. If you don't understand
+ what was just said, you don't need it: say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called sctp_probe.
+
config SCTP_DBG_MSG
bool "SCTP: Debug messages"
help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b794734380a..5c30b7a873df 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_IP_SCTP) += sctp.o
+obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
+sctp_probe-y := probe.o
+
sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
sctp-$(CONFIG_PROC_FS) += proc.o
sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 99c93ee98ad9..3912420cedcc 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Retrieve the SCTP per socket area. */
sp = sctp_sk((struct sock *)sk);
- /* Init all variables to a known value. */
- memset(asoc, 0, sizeof(struct sctp_association));
-
/* Discarding const is appropriate here. */
asoc->ep = (struct sctp_endpoint *)ep;
sctp_endpoint_hold(asoc->ep);
@@ -762,7 +759,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
asoc->peer.retran_path = peer;
}
- if (asoc->peer.active_path == asoc->peer.retran_path) {
+ if (asoc->peer.active_path == asoc->peer.retran_path &&
+ peer->state != SCTP_UNCONFIRMED) {
asoc->peer.retran_path = peer;
}
@@ -1320,12 +1318,13 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
/* Keep track of the next transport in case
* we don't find any active transport.
*/
- if (!next)
+ if (t->state != SCTP_UNCONFIRMED && !next)
next = t;
}
}
- asoc->peer.retran_path = t;
+ if (t)
+ asoc->peer.retran_path = t;
SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
" %p addr: ",
@@ -1485,7 +1484,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
if (asoc->rwnd >= len) {
asoc->rwnd -= len;
if (over) {
- asoc->rwnd_press = asoc->rwnd;
+ asoc->rwnd_press += asoc->rwnd;
asoc->rwnd = 0;
}
} else {
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db59a37..476caaf100ed 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
msg->send_failed = 0;
msg->send_error = 0;
msg->can_abandon = 0;
+ msg->can_delay = 1;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
- msg->msg_size = 0;
}
/* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
{
sctp_datamsg_hold(msg);
chunk->msg = msg;
- msg->msg_size += chunk->skb->len;
}
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (msg_len >= first_len) {
msg_len -= first_len;
whole = 1;
+ msg->can_delay = 0;
}
/* How many full sized? How many bytes leftover? */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 7ec09ba03a1c..e10acc01c75f 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct sctp_shared_key *null_key;
int err;
- memset(ep, 0, sizeof(struct sctp_endpoint));
-
ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
if (!ep->digest)
return NULL;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d41ec2..a646681f5acd 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
+ /* 6.3.1 C4) When data is in flight and when allowed
+ * by rule C5, a new RTT measurement MUST be made each
+ * round trip. Furthermore, new RTT measurements
+ * SHOULD be made no more than once per round-trip
+ * for a given destination transport address.
+ */
- if (!chunk->resent) {
-
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
-
- if (!tp->rto_pending) {
- chunk->rtt_in_progress = 1;
- tp->rto_pending = 1;
- }
+ if (!tp->rto_pending) {
+ chunk->rtt_in_progress = 1;
+ tp->rto_pending = 1;
}
-
- chunk->resent = 1;
-
has_data = 1;
}
@@ -681,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
* Don't delay large message writes that may have been
* fragmeneted into small peices.
*/
- if ((len < max) && (chunk->msg->msg_size < max)) {
+ if ((len < max) && chunk->msg->can_delay) {
retval = SCTP_XMIT_NAGLE_DELAY;
goto finish;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8dee74..5d057178ce0c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
struct sctp_sackhdr *sack,
- __u32 highest_new_tsn);
+ __u32 *highest_new_tsn);
static void sctp_mark_missing(struct sctp_outq *q,
struct list_head *transmitted_queue,
@@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
/* If it is data, queue it up, otherwise, send it
* immediately.
*/
- if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+ if (sctp_chunk_is_data(chunk)) {
/* Is it OK to queue data chunks? */
/* From 9. Termination of Association
*
@@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
if (fast_rtx && !chunk->fast_retransmit)
continue;
+redo:
/* Attempt to append this chunk to the packet. */
status = sctp_packet_append_chunk(pkt, chunk);
switch (status) {
case SCTP_XMIT_PMTU_FULL:
+ if (!pkt->has_data && !pkt->has_cookie_echo) {
+ /* If this packet did not contain DATA then
+ * retransmission did not happen, so do it
+ * again. We'll ignore the error here since
+ * control chunks are already freed so there
+ * is nothing we can do.
+ */
+ sctp_packet_transmit(pkt);
+ goto redo;
+ }
+
/* Send this packet. */
error = sctp_packet_transmit(pkt);
@@ -647,14 +659,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
if (chunk->fast_retransmit == SCTP_NEED_FRTX)
chunk->fast_retransmit = SCTP_DONT_FRTX;
- /* Force start T3-rtx timer when fast retransmitting
- * the earliest outstanding TSN
- */
- if (!timer && fast_rtx &&
- ntohl(chunk->subh.data_hdr->tsn) ==
- asoc->ctsn_ack_point + 1)
- timer = 2;
-
q->empty = 0;
break;
}
@@ -854,6 +858,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
if (status != SCTP_XMIT_OK) {
/* put the chunk back */
list_add(&chunk->list, &q->control_chunk_list);
+ } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+ /* PR-SCTP C5) If a FORWARD TSN is sent, the
+ * sender MUST assure that at least one T3-rtx
+ * timer is running.
+ */
+ sctp_transport_reset_timers(transport);
}
break;
@@ -906,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
rtx_timeout, &start_timer);
if (start_timer)
- sctp_transport_reset_timers(transport,
- start_timer-1);
+ sctp_transport_reset_timers(transport);
/* This can happen on COOKIE-ECHO resend. Only
* one chunk can get bundled with a COOKIE-ECHO.
@@ -1040,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
list_add_tail(&chunk->transmitted_list,
&transport->transmitted);
- sctp_transport_reset_timers(transport, 0);
+ sctp_transport_reset_timers(transport);
q->empty = 0;
@@ -1100,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
assoc->unack_data = unack_data;
}
-/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
-static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
- struct sctp_association *asoc)
-{
- struct sctp_transport *transport;
- struct sctp_chunk *chunk;
- __u32 highest_new_tsn, tsn;
- struct list_head *transport_list = &asoc->peer.transport_addr_list;
-
- highest_new_tsn = ntohl(sack->cum_tsn_ack);
-
- list_for_each_entry(transport, transport_list, transports) {
- list_for_each_entry(chunk, &transport->transmitted,
- transmitted_list) {
- tsn = ntohl(chunk->subh.data_hdr->tsn);
-
- if (!chunk->tsn_gap_acked &&
- TSN_lt(highest_new_tsn, tsn) &&
- sctp_acked(sack, tsn))
- highest_new_tsn = tsn;
- }
- }
-
- return highest_new_tsn;
-}
-
/* This is where we REALLY process a SACK.
*
* Process the SACK against the outqueue. Mostly, this just frees
@@ -1145,6 +1128,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
struct sctp_transport *primary = asoc->peer.primary_path;
int count_of_newacks = 0;
int gap_ack_blocks;
+ u8 accum_moved = 0;
/* Grab the association's destination address list. */
transport_list = &asoc->peer.transport_addr_list;
@@ -1193,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
if (gap_ack_blocks)
highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
- if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
- highest_new_tsn = highest_tsn;
+ if (TSN_lt(asoc->highest_sacked, highest_tsn))
asoc->highest_sacked = highest_tsn;
- } else {
- highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
- }
+ highest_new_tsn = sack_ctsn;
/* Run through the retransmit queue. Credit bytes received
* and free those chunks that we can.
*/
- sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+ sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
/* Run through the transmitted queue.
* Credit bytes received and free those chunks which we can.
@@ -1213,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
*/
list_for_each_entry(transport, transport_list, transports) {
sctp_check_transmitted(q, &transport->transmitted,
- transport, sack, highest_new_tsn);
+ transport, sack, &highest_new_tsn);
/*
* SFR-CACC algorithm:
* C) Let count_of_newacks be the number of
@@ -1223,16 +1204,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
count_of_newacks ++;
}
+ /* Move the Cumulative TSN Ack Point if appropriate. */
+ if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
+ asoc->ctsn_ack_point = sack_ctsn;
+ accum_moved = 1;
+ }
+
if (gap_ack_blocks) {
+
+ if (asoc->fast_recovery && accum_moved)
+ highest_new_tsn = highest_tsn;
+
list_for_each_entry(transport, transport_list, transports)
sctp_mark_missing(q, &transport->transmitted, transport,
highest_new_tsn, count_of_newacks);
}
- /* Move the Cumulative TSN Ack Point if appropriate. */
- if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
- asoc->ctsn_ack_point = sack_ctsn;
-
/* Update unack_data field in the assoc. */
sctp_sack_update_unack_data(asoc, sack);
@@ -1315,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
struct sctp_sackhdr *sack,
- __u32 highest_new_tsn_in_sack)
+ __u32 *highest_new_tsn_in_sack)
{
struct list_head *lchunk;
struct sctp_chunk *tchunk;
@@ -1387,7 +1374,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* instance).
*/
if (!tchunk->tsn_gap_acked &&
- !tchunk->resent &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
@@ -1404,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
*/
if (!tchunk->tsn_gap_acked) {
tchunk->tsn_gap_acked = 1;
+ *highest_new_tsn_in_sack = tsn;
bytes_acked += sctp_data_size(tchunk);
if (!tchunk->transport)
migrate_bytes += sctp_data_size(tchunk);
@@ -1677,7 +1664,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
struct sctp_chunk *chunk;
__u32 tsn;
char do_fast_retransmit = 0;
- struct sctp_transport *primary = q->asoc->peer.primary_path;
+ struct sctp_association *asoc = q->asoc;
+ struct sctp_transport *primary = asoc->peer.primary_path;
list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 000000000000..db3a42b8b349
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,214 @@
+/*
+ * sctp_probe - Observe the SCTP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for SCTP from Stephen Hemminger's code
+ * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
+MODULE_DESCRIPTION("SCTP snooper");
+MODULE_LICENSE("GPL");
+
+static int port __read_mostly = 0;
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+static int bufsize __read_mostly = 64 * 1024;
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+static int full __read_mostly = 1;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
+module_param(full, int, 0);
+
+static const char procname[] = "sctpprobe";
+
+static struct {
+ struct kfifo fifo;
+ spinlock_t lock;
+ wait_queue_head_t wait;
+ struct timespec tstart;
+} sctpw;
+
+static void printl(const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ char tbuf[256];
+
+ va_start(args, fmt);
+ len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
+ va_end(args);
+
+ kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+ wake_up(&sctpw.wait);
+}
+
+static int sctpprobe_open(struct inode *inode, struct file *file)
+{
+ kfifo_reset(&sctpw.fifo);
+ getnstimeofday(&sctpw.tstart);
+
+ return 0;
+}
+
+static ssize_t sctpprobe_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ int error = 0, cnt = 0;
+ unsigned char *tbuf;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (len == 0)
+ return 0;
+
+ tbuf = vmalloc(len);
+ if (!tbuf)
+ return -ENOMEM;
+
+ error = wait_event_interruptible(sctpw.wait,
+ kfifo_len(&sctpw.fifo) != 0);
+ if (error)
+ goto out_free;
+
+ cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+ error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
+
+out_free:
+ vfree(tbuf);
+
+ return error ? error : cnt;
+}
+
+static const struct file_operations sctpprobe_fops = {
+ .owner = THIS_MODULE,
+ .open = sctpprobe_open,
+ .read = sctpprobe_read,
+};
+
+sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const sctp_subtype_t type,
+ void *arg,
+ sctp_cmd_seq_t *commands)
+{
+ struct sctp_transport *sp;
+ static __u32 lcwnd = 0;
+ struct timespec now;
+
+ sp = asoc->peer.primary_path;
+
+ if ((full || sp->cwnd != lcwnd) &&
+ (!port || asoc->peer.port == port ||
+ ep->base.bind_addr.port == port)) {
+ lcwnd = sp->cwnd;
+
+ getnstimeofday(&now);
+ now = timespec_sub(now, sctpw.tstart);
+
+ printl("%lu.%06lu ", (unsigned long) now.tv_sec,
+ (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+
+ printl("%p %5d %5d %5d %8d %5d ", asoc,
+ ep->base.bind_addr.port, asoc->peer.port,
+ asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
+
+ list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+ transports) {
+ if (sp == asoc->peer.primary_path)
+ printl("*");
+
+ if (sp->ipaddr.sa.sa_family == AF_INET)
+ printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
+ else
+ printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
+
+ printl("%2u %8u %8u %8u %8u %8u ",
+ sp->state, sp->cwnd, sp->ssthresh,
+ sp->flight_size, sp->partial_bytes_acked,
+ sp->pathmtu);
+ }
+ printl("\n");
+ }
+
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe sctp_recv_probe = {
+ .kp = {
+ .symbol_name = "sctp_sf_eat_sack_6_2",
+ },
+ .entry = jsctp_sf_eat_sack,
+};
+
+static __init int sctpprobe_init(void)
+{
+ int ret = -ENOMEM;
+
+ init_waitqueue_head(&sctpw.wait);
+ spin_lock_init(&sctpw.lock);
+ if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+ return ret;
+
+ if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
+ &sctpprobe_fops))
+ goto free_kfifo;
+
+ ret = register_jprobe(&sctp_recv_probe);
+ if (ret)
+ goto remove_proc;
+
+ pr_info("SCTP probe registered (port=%d)\n", port);
+
+ return 0;
+
+remove_proc:
+ proc_net_remove(&init_net, procname);
+free_kfifo:
+ kfifo_free(&sctpw.fifo);
+ return ret;
+}
+
+static __exit void sctpprobe_exit(void)
+{
+ kfifo_free(&sctpw.fifo);
+ proc_net_remove(&init_net, procname);
+ unregister_jprobe(&sctp_recv_probe);
+}
+
+module_init(sctpprobe_init);
+module_exit(sctpprobe_exit);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 704298f4b284..182749867c72 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
memset(&fl, 0x0, sizeof(struct flowi));
fl.fl4_dst = daddr->v4.sin_addr.s_addr;
+ fl.fl_ip_dport = daddr->v4.sin_port;
fl.proto = IPPROTO_SCTP;
if (asoc) {
fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
fl.oif = asoc->base.sk->sk_bound_dev_if;
+ fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
}
- if (saddr)
+ if (saddr) {
fl.fl4_src = saddr->v4.sin_addr.s_addr;
+ fl.fl_ip_sport = saddr->v4.sin_port;
+ }
SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
__func__, &fl.fl4_dst, &fl.fl4_src);
@@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
if ((laddr->state == SCTP_ADDR_SRC) &&
(AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+ fl.fl_ip_sport = laddr->a.v4.sin_port;
if (!ip_route_output_key(&init_net, &rt, &fl)) {
dst = &rt->u.dst;
goto out_unlock;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 30c1767186b8..d8261f3d7715 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -445,10 +445,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
if (!retval)
goto nomem_chunk;
- /* Per the advice in RFC 2960 6.4, send this reply to
- * the source of the INIT packet.
+ /* RFC 2960 6.4 Multi-homed SCTP Endpoints
+ *
+ * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+ * HEARTBEAT ACK, * etc.) to the same destination transport
+ * address from which it received the DATA or control chunk
+ * to which it is replying.
+ *
+ * [INIT ACK back to where the INIT came from.]
*/
retval->transport = chunk->transport;
+
retval->subh.init_hdr =
sctp_addto_chunk(retval, sizeof(initack), &initack);
retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -487,18 +494,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
/* We need to remove the const qualifier at this point. */
retval->asoc = (struct sctp_association *) asoc;
- /* RFC 2960 6.4 Multi-homed SCTP Endpoints
- *
- * An endpoint SHOULD transmit reply chunks (e.g., SACK,
- * HEARTBEAT ACK, * etc.) to the same destination transport
- * address from which it received the DATA or control chunk
- * to which it is replying.
- *
- * [INIT ACK back to where the INIT came from.]
- */
- if (chunk)
- retval->transport = chunk->transport;
-
nomem_chunk:
kfree(cookie);
nomem_cookie:
@@ -1254,7 +1249,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
INIT_LIST_HEAD(&retval->list);
retval->skb = skb;
retval->asoc = (struct sctp_association *)asoc;
- retval->resent = 0;
retval->has_tsn = 0;
retval->has_ssn = 0;
retval->rtt_in_progress = 0;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d5ae450b6f02..3b7230ef77c2 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
{
struct sctp_transport *t;
- t = sctp_assoc_choose_alter_transport(asoc,
+ if (chunk->transport)
+ t = chunk->transport;
+ else {
+ t = sctp_assoc_choose_alter_transport(asoc,
asoc->shutdown_last_sent_to);
+ chunk->transport = t;
+ }
asoc->shutdown_last_sent_to = t;
asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
- chunk->transport = t;
}
/* Helper function to change the state of an association. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bae764005d2d..ba1add0b13c3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4384,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
transports) {
memcpy(&temp, &from->ipaddr, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
- addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+ addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if (space_left < addrlen)
return -ENOMEM;
if (copy_to_user(to, &temp, addrlen))
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5cc..fccf4947aff1 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
/* Copy in the address. */
peer->ipaddr = *addr;
peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
- peer->asoc = NULL;
-
- peer->dst = NULL;
memset(&peer->saddr, 0, sizeof(union sctp_addr));
/* From 6.3.1 RTO Calculation:
@@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
* parameter 'RTO.Initial'.
*/
peer->rto = msecs_to_jiffies(sctp_rto_initial);
- peer->rtt = 0;
- peer->rttvar = 0;
- peer->srtt = 0;
- peer->rto_pending = 0;
- peer->hb_sent = 0;
- peer->fast_recovery = 0;
peer->last_time_heard = jiffies;
peer->last_time_ecne_reduced = jiffies;
- peer->init_sent_count = 0;
-
peer->param_flags = SPP_HB_DISABLE |
SPP_PMTUD_ENABLE |
SPP_SACKDELAY_ENABLE;
- peer->hbinterval = 0;
/* Initialize the default path max_retrans. */
peer->pathmaxrxt = sctp_max_retrans_path;
- peer->error_count = 0;
INIT_LIST_HEAD(&peer->transmitted);
INIT_LIST_HEAD(&peer->send_ready);
INIT_LIST_HEAD(&peer->transports);
- peer->T3_rtx_timer.expires = 0;
- peer->hb_timer.expires = 0;
-
setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
(unsigned long)peer);
setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
@@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
atomic_set(&peer->refcnt, 1);
- peer->dead = 0;
-
- peer->malloced = 0;
-
- /* Initialize the state information for SFR-CACC */
- peer->cacc.changeover_active = 0;
- peer->cacc.cycling_changeover = 0;
- peer->cacc.next_tsn_at_change = 0;
- peer->cacc.cacc_saw_newack = 0;
return peer;
}
@@ -195,7 +170,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
/* Start T3_rtx timer if it is not already running and update the heartbeat
* timer. This routine is called every time a DATA chunk is sent.
*/
-void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
+void sctp_transport_reset_timers(struct sctp_transport *transport)
{
/* RFC 2960 6.3.2 Retransmission Timer Rules
*
@@ -205,7 +180,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
* address.
*/
- if (force || !timer_pending(&transport->T3_rtx_timer))
+ if (!timer_pending(&transport->T3_rtx_timer))
if (!mod_timer(&transport->T3_rtx_timer,
jiffies + transport->rto))
sctp_transport_hold(transport);
@@ -403,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
void sctp_transport_raise_cwnd(struct sctp_transport *transport,
__u32 sack_ctsn, __u32 bytes_acked)
{
+ struct sctp_association *asoc = transport->asoc;
__u32 cwnd, ssthresh, flight_size, pba, pmtu;
cwnd = transport->cwnd;
flight_size = transport->flight_size;
/* See if we need to exit Fast Recovery first */
- if (transport->fast_recovery &&
- TSN_lte(transport->fast_recovery_exit, sack_ctsn))
- transport->fast_recovery = 0;
+ if (asoc->fast_recovery &&
+ TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
+ asoc->fast_recovery = 0;
/* The appropriate cwnd increase algorithm is performed if, and only
* if the cumulative TSN whould advanced and the congestion window is
@@ -440,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
* 2) the destination's path MTU. This upper bound protects
* against the ACK-Splitting attack outlined in [SAVAGE99].
*/
- if (transport->fast_recovery)
+ if (asoc->fast_recovery)
return;
if (bytes_acked > pmtu)
@@ -491,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
void sctp_transport_lower_cwnd(struct sctp_transport *transport,
sctp_lower_cwnd_t reason)
{
+ struct sctp_association *asoc = transport->asoc;
+
switch (reason) {
case SCTP_LOWER_CWND_T3_RTX:
/* RFC 2960 Section 7.2.3, sctpimpguide
@@ -501,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* partial_bytes_acked = 0
*/
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
- transport->cwnd = transport->asoc->pathmtu;
+ 4*asoc->pathmtu);
+ transport->cwnd = asoc->pathmtu;
- /* T3-rtx also clears fast recovery on the transport */
- transport->fast_recovery = 0;
+ /* T3-rtx also clears fast recovery */
+ asoc->fast_recovery = 0;
break;
case SCTP_LOWER_CWND_FAST_RTX:
@@ -521,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* cwnd = ssthresh
* partial_bytes_acked = 0
*/
- if (transport->fast_recovery)
+ if (asoc->fast_recovery)
return;
/* Mark Fast recovery */
- transport->fast_recovery = 1;
- transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+ asoc->fast_recovery = 1;
+ asoc->fast_recovery_exit = asoc->next_tsn - 1;
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
transport->cwnd = transport->ssthresh;
break;
@@ -549,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
if (time_after(jiffies, transport->last_time_ecne_reduced +
transport->rtt)) {
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
transport->cwnd = transport->ssthresh;
transport->last_time_ecne_reduced = jiffies;
}
@@ -565,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* interval.
*/
transport->cwnd = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
break;
}
@@ -650,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t)
t->error_count = 0;
t->rto_pending = 0;
t->hb_sent = 0;
- t->fast_recovery = 0;
/* Initialize the state information for SFR-CACC */
t->cacc.changeover_active = 0;