From 475cba4ec8ee6b427cc3567692e6f48dd483c069 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:52 -0500 Subject: sctp: implement definition for SACK-IMMEDIATELY extension This patch implement the definition for SACK-IMMEDIATELY extension. Section 3. The I-bit in the DATA Chunk Header The following Figure 1 shows the extended DATA chunk. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type = 0 | Res |I|U|B|E| Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | TSN | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Stream Identifier | Stream Sequence Number | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Payload Protocol Identifier | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ \ \ / User Data / \ \ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Figure 1 The only difference between the DATA chunk in Figure 1 and the DATA chunk defined in [RFC4960] is the addition of the I-bit in the flags field of the chunk header. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/linux/sctp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b464b9d3d242..c20d3ce673c0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -242,6 +242,7 @@ enum { SCTP_DATA_FIRST_FRAG = 0x02, SCTP_DATA_NOT_FRAG = 0x03, SCTP_DATA_UNORDERED = 0x04, + SCTP_DATA_SACK_IMM = 0x08, }; enum { SCTP_DATA_FRAG_MASK = 0x03, }; -- cgit v1.2.3-59-g8ed1b From 6dc7694f9df20f148076d82d00cb3663afb0b000 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:53 -0500 Subject: sctp: implement the receiver side for SACK-IMMEDIATELY extension This patch implement the receiver side for SACK-IMMEDIATELY extension: Section 4.2. Receiver Side Considerations On reception of an SCTP packet containing a DATA chunk with the I-bit set, the receiver SHOULD NOT delay the sending of the corresponding SACK chunk and SHOULD send it back immediately. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d4df45022ffa..8ee24c9dc7e9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2868,6 +2868,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; + sctp_arg_t force = SCTP_NOFORCE(); int error; if (!sctp_vtag_verify(chunk, asoc)) { @@ -2901,6 +2902,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, BUG(); } + if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) + force = SCTP_FORCE(); + if (asoc->autoclose) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2929,7 +2933,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, * more aggressive than the following algorithms allow. */ if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_CONSUME; @@ -2954,7 +2958,7 @@ discard_force: discard_noforce: if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_DISCARD; consume: -- cgit v1.2.3-59-g8ed1b From b93d6471748de2ce02cc24774b774deb306a57a8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: implement the sender side for SACK-IMMEDIATELY extension This patch implement the sender side for SACK-IMMEDIATELY extension. Section 4.1. Sender Side Considerations Whenever the sender of a DATA chunk can benefit from the corresponding SACK chunk being sent back without delay, the sender MAY set the I-bit in the DATA chunk header. Reasons for setting the I-bit include o The sender is in the SHUTDOWN-PENDING state. o The application requests to set the I-bit of the last DATA chunk of a user message when providing the user message to the SCTP implementation. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/net/sctp/user.h | 1 + net/sctp/chunk.c | 15 ++++++++++++++- net/sctp/outqueue.c | 7 +++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index be2334aaf52e..50b2431405f2 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -206,6 +206,7 @@ enum sctp_sinfo_flags { SCTP_UNORDERED = 1, /* Send/receive message unordered. */ SCTP_ADDR_OVER = 2, /* Override the primary destination. */ SCTP_ABORT=4, /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index acf7c4d128f7..8e4320040f05 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -263,9 +263,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (0 == i) frag |= SCTP_DATA_FIRST_FRAG; - if ((i == (whole - 1)) && !over) + if ((i == (whole - 1)) && !over) { frag |= SCTP_DATA_LAST_FRAG; + /* The application requests to set the I-bit of the + * last DATA chunk of a user message when providing + * the user message to the SCTP implementation. + */ + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + } + chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); if (!chunk) @@ -297,6 +306,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, else frag = SCTP_DATA_LAST_FRAG; + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); if (!chunk) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521b..5732661c87d3 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1011,6 +1011,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) break; case SCTP_XMIT_OK: + /* The sender is in the SHUTDOWN-PENDING state, + * The sender MAY set the I-bit in the DATA + * chunk header. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) + chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM; + break; default: -- cgit v1.2.3-59-g8ed1b From 6383cfb3ed3c5c0bea06da0099c219ef4237ecf5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: Fix malformed "Invalid Stream Identifier" error The "Invalid Stream Identifier" error has a 16 bit reserved field at the end, thus making the parameter length be 8 bytes. We've never supplied that reserved field making wireshark tag the packet as malformed. Reported-by: Chris Dischino Signed-off-by: Vlad Yasevich --- include/net/sctp/sm.h | 3 ++- net/sctp/sm_make_chunk.c | 13 +++++++++---- net/sctp/sm_statefuns.c | 13 ++++++++----- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index c1dd89365833..851c813adb3a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -243,7 +243,8 @@ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen); + size_t paylen, + size_t reserve_tail); struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, union sctp_addr *, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9d881a61ac02..9e732916b671 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -987,7 +987,10 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, target = skb_put(chunk->skb, len); - memcpy(target, data, len); + if (data) + memcpy(target, data, len); + else + memset(target, 0, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + len); @@ -1129,16 +1132,18 @@ nodata: struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen) + size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; - retval = sctp_make_op_error_space(asoc, chunk, paylen); + retval = sctp_make_op_error_space(asoc, chunk, paylen + reserve_tail); if (!retval) goto nodata; - sctp_init_cause(retval, cause_code, paylen); + sctp_init_cause(retval, cause_code, paylen + reserve_tail); sctp_addto_chunk(retval, paylen, payload); + if (reserve_tail) + sctp_addto_param(retval, reserve_tail, NULL); nodata: return retval; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8ee24c9dc7e9..16a603527df2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1720,7 +1720,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_COOKIE_IN_SHUTDOWN, - NULL, 0); + NULL, 0, 0); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); @@ -3977,7 +3977,7 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, err_chunk = sctp_make_op_error(asoc, chunk, SCTP_ERROR_UNSUP_HMAC, &auth_hdr->hmac_id, - sizeof(__u16)); + sizeof(__u16), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4069,7 +4069,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4088,7 +4089,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -6052,7 +6054,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, - sizeof(data_hdr->stream)); + sizeof(data_hdr->stream), + sizeof(u16)); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); -- cgit v1.2.3-59-g8ed1b From e0e9db178a5ba4dbb5f16f958f1affbdc63d2cc4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Select a working primary during sctp_connectx() When sctp_connectx() is used, we pick the first address as primary, even though it may not have worked. This results in excessive retransmits and poor performance. We should select the address that the association was established with. Reported-by: Thomas Dreibholz Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 6 ++++++ net/sctp/sm_sideeffect.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 7eed77a39d0d..8e755ebff3b8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -512,7 +512,13 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * to this destination address earlier. The sender MUST set * CYCLING_CHANGEOVER to indicate that this switch is a * double switch to the same destination address. + * + * Really, only bother is we have data queued or outstanding on + * the association. */ + if (!asoc->outqueue.outstanding_bytes && !asoc->outqueue.out_qlen) + return; + if (transport->cacc.changeover_active) transport->cacc.cycling_changeover = changeover; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..eda4fe783be5 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1418,6 +1418,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; + /* Set the new transport as primary */ + sctp_assoc_set_primary(asoc, t); break; case SCTP_CMD_INIT_RESTART: -- cgit v1.2.3-59-g8ed1b From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 4 ++++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 5 +++-- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +-- net/sctp/sysctl.c | 13 +++++++++++++ 6 files changed, 30 insertions(+), 4 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 58f714a3b670..63908840eef0 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -308,6 +308,10 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ +#define SCTP_DEFAULT_RWND_SHIFT 4 /* by default, update on 1/16 of + * rcvbuf, which is 1/8 of initial + * window + */ #define SCTP_DEFAULT_MAXSEGMENT 1500 /* MTU size, this is the limit * to which we will raise the P-MTU. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f81..90876b657775 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -231,6 +231,11 @@ extern struct sctp_globals { /* Flag to indicate whether computing and verifying checksum * is disabled. */ int checksum_disable; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_update_shift; } sctp_globals; #define sctp_rto_initial (sctp_globals.rto_initial) @@ -267,6 +272,7 @@ extern struct sctp_globals { #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) +#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) /* SCTP Socket type: UDP or TCP style. */ typedef enum { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8e755ebff3b8..37e982510bea 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1383,8 +1383,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 08ef203d36ac..a3c8988758b1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eda4fe783be5..8ae67098e094 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120f..ae03ded2bf1a 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -284,6 +285,18 @@ static ctl_table sctp_table[] = { .extra1 = &zero, .extra2 = &addr_scope_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + { .ctl_name = 0 } }; -- cgit v1.2.3-59-g8ed1b From 37051f73862d15862aecc08b887b2884137af327 Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: allow setting path_maxrxt independent of SPP_PMTUD_ENABLE Since draft-ietf-tsvwg-sctpsocket-15.txt, setting the SPP_MTUD_ENABLE flag when changing pathmaxrxt via the SCTP_PEER_ADDR_PARAMS setsockopt is not required any longer. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 66b1f02b17ba..a4577a75c6c0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2311,11 +2311,10 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, } } - /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value - * of this field is ignored. Note also that a value of zero - * indicates the current setting should be left unchanged. + /* Note that a value of zero indicates the current setting should be + left unchanged. */ - if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) { + if (params->spp_pathmaxrxt) { if (trans) { trans->pathmaxrxt = params->spp_pathmaxrxt; } else if (asoc) { -- cgit v1.2.3-59-g8ed1b From a242b41dedfe0fd51ab1c906daa703c09b196744 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: remove deprecated SCTP_GET_*_OLD stuffs SCTP_GET_*_OLD stuffs are schedlued to be removed. Cc: Vlad Yasevich Signed-off-by: WANG Cong Signed-off-by: Vlad Yasevich --- Documentation/feature-removal-schedule.txt | 12 -- include/net/sctp/user.h | 8 - net/sctp/socket.c | 325 ----------------------------- 3 files changed, 345 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693fffabe0..72ae06f73acf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -302,18 +302,6 @@ Who: ocfs2-devel@oss.oracle.com --------------------------- -What: SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD, - SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD -When: June 2009 -Why: A newer version of the options have been introduced in 2005 that - removes the limitions of the old API. The sctp library has been - converted to use these new options at the same time. Any user - space app that directly uses the old options should convert to using - the new options. -Who: Vlad Yasevich - ---------------------------- - What: Ability for non root users to shm_get hugetlb pages based on mlock resource limits When: 2.6.31 diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 50b2431405f2..fceab4d2413e 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -131,14 +131,6 @@ enum sctp_optname { #define SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_PEELOFF, /* peel off association. */ #define SCTP_SOCKOPT_PEELOFF SCTP_SOCKOPT_PEELOFF - SCTP_GET_PEER_ADDRS_NUM_OLD, /* Get number of peer addresss. */ -#define SCTP_GET_PEER_ADDRS_NUM_OLD SCTP_GET_PEER_ADDRS_NUM_OLD - SCTP_GET_PEER_ADDRS_OLD, /* Get all peer addresss. */ -#define SCTP_GET_PEER_ADDRS_OLD SCTP_GET_PEER_ADDRS_OLD - SCTP_GET_LOCAL_ADDRS_NUM_OLD, /* Get number of local addresss. */ -#define SCTP_GET_LOCAL_ADDRS_NUM_OLD SCTP_GET_LOCAL_ADDRS_NUM_OLD - SCTP_GET_LOCAL_ADDRS_OLD, /* Get all local addresss. */ -#define SCTP_GET_LOCAL_ADDRS_OLD SCTP_GET_LOCAL_ADDRS_OLD SCTP_SOCKOPT_CONNECTX_OLD, /* CONNECTX old requests. */ #define SCTP_SOCKOPT_CONNECTX_OLD SCTP_SOCKOPT_CONNECTX_OLD SCTP_GET_PEER_ADDRS, /* Get all peer addresss. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a4577a75c6c0..d2681a6bc6fa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4348,90 +4348,6 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval return 0; } -static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_association *asoc; - struct list_head *pos; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " - "socket option deprecated\n"); - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - - list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; - } - - return cnt; -} - -/* - * Old API for getting list of peer addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_transport *from; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0) return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " - "socket option deprecated\n"); - - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - - to = (void __user *)getaddrs.addrs; - list_for_each_entry(from, &asoc->peer.transport_addr_list, - transports) { - memcpy(&temp, &from->ipaddr, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) - return -EFAULT; - to += addrlen ; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - getaddrs.addr_num = cnt; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &getaddrs, len)) - return -EFAULT; - - return 0; -} static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -4484,125 +4400,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return 0; } -static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - struct sctp_sockaddr_entry *addr; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - rcu_read_lock(); - list_for_each_entry_rcu(addr, - &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - - cnt++; - } - rcu_read_unlock(); - } else { - cnt = 1; - } - goto done; - } - - /* Protection on the bound address list is not needed, - * since in the socket option context we hold the socket lock, - * so there is no way that the bound address list can change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - cnt ++; - } -done: - return cnt; -} - -/* Helper function that copies local addresses to user and returns the number - * of addresses copied. - */ -static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, - int max_addrs, void *to, - int *bytes_copied) -{ - struct sctp_sockaddr_entry *addr; - union sctp_addr temp; - int cnt = 0; - int addrlen; - - rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - memcpy(&temp, &addr->a, sizeof(temp)); - if (!temp.v4.sin_port) - temp.v4.sin_port = htons(port); - - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), - &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(to, &temp, addrlen); - - to += addrlen; - *bytes_copied += addrlen; - cnt ++; - if (cnt >= max_addrs) break; - } - rcu_read_unlock(); - - return cnt; -} - static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { @@ -4646,112 +4443,6 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, return cnt; } -/* Old API for getting list of local addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_sockaddr_entry *addr; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - int err = 0; - void *addrs; - void *buf; - int bytes_copied = 0; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0 || - getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) - return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == getaddrs.assoc_id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - to = getaddrs.addrs; - - /* Allocate space for a local instance of packed array to hold all - * the data. We store addresses here first and then put write them - * to the user in one shot. - */ - addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num, - GFP_KERNEL); - if (!addrs) - return -ENOMEM; - - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - cnt = sctp_copy_laddrs_old(sk, bp->port, - getaddrs.addr_num, - addrs, &bytes_copied); - goto copy_getaddrs; - } - } - - buf = addrs; - /* Protection on the bound address list is not needed since - * in the socket option context we hold a socket lock and - * thus the bound address list can't change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - memcpy(&temp, &addr->a, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(buf, &temp, addrlen); - buf += addrlen; - bytes_copied += addrlen; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - -copy_getaddrs: - /* copy the entire address list into the user provided space */ - if (copy_to_user(to, addrs, bytes_copied)) { - err = -EFAULT; - goto error; - } - - /* copy the leading structure back to user */ - getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, len)) - err = -EFAULT; - -error: - kfree(addrs); - return err; -} static int sctp_getsockopt_local_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -5602,22 +5293,6 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM_OLD: - retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_NUM_OLD: - retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_PEER_ADDRS_OLD: - retval = sctp_getsockopt_peer_addrs_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_OLD: - retval = sctp_getsockopt_local_addrs_old(sk, len, optval, - optlen); - break; case SCTP_GET_PEER_ADDRS: retval = sctp_getsockopt_peer_addrs(sk, len, optval, optlen); -- cgit v1.2.3-59-g8ed1b From 245cba7e55929dc2b10b7d915bfba0168eeeed17 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: Remove useless last_time_used variable The transport last_time_used variable is rather useless. It was only used when determining if CWND needs to be updated due to idle transport. However, idle transport detection was based on a Heartbeat timer and last_time_used was not incremented when sending Heartbeats. As a result the check for cwnd reduction was always true. We can get rid of the variable and just base our cwnd manipulation on the HB timer (like the code comment sais). We also have to call into the cwnd manipulation function regardless of whether HBs are enabled or not. That way we will detect idle transports if the user has disabled Heartbeats. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 6 ------ net/sctp/output.c | 2 -- net/sctp/sm_statefuns.c | 5 +++-- net/sctp/transport.c | 7 ++----- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 90876b657775..ac794a6823eb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,12 +950,6 @@ struct sctp_transport { /* Source address. */ union sctp_addr saddr; - /* When was the last time(in jiffies) that a data packet was sent on - * this transport? This is used to adjust the cwnd when the transport - * becomes inactive. - */ - unsigned long last_time_used; - /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 5cbda8f1ddfd..9e8e0ea844be 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -557,8 +557,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct timer_list *timer; unsigned long timeout; - tp->last_time_used = jiffies; - /* Restart the AUTOCLOSE timer when sending data. */ if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 16a603527df2..1ef9de9bbae9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -996,14 +996,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; + /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, - SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32faf..2df29cbdaf5a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->fast_recovery = 0; peer->last_time_heard = jiffies; - peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; peer->init_sent_count = 0; @@ -565,10 +564,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if (time_after(jiffies, transport->last_time_used + - transport->rto)) - transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + transport->cwnd = max(transport->cwnd/2, + 4*transport->asoc->pathmtu); break; } -- cgit v1.2.3-59-g8ed1b From a5b03ad2143c5bc9ae76533e8321fe66258b4f35 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:59 -0500 Subject: sctp: Turn the enum socket options into defines Recent attempt to remove deprecated socket options demonstrated that removing options from the enum space will have severe binary compatibility issues. The reason is that it changes the subsequent enum space and causes option values to be redefined. To solve this, and to get rid of the ugly double statements for every option, we simply convert to the #define scheme. Signed-off-by: Vlad Yasevich --- include/net/sctp/user.h | 125 +++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 82 deletions(-) diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index fceab4d2413e..2b2769c5ca9f 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -60,88 +60,49 @@ typedef __s32 sctp_assoc_t; /* The following symbols come from the Sockets API Extensions for * SCTP . */ -enum sctp_optname { - SCTP_RTOINFO, -#define SCTP_RTOINFO SCTP_RTOINFO - SCTP_ASSOCINFO, -#define SCTP_ASSOCINFO SCTP_ASSOCINFO - SCTP_INITMSG, -#define SCTP_INITMSG SCTP_INITMSG - SCTP_NODELAY, /* Get/set nodelay option. */ -#define SCTP_NODELAY SCTP_NODELAY - SCTP_AUTOCLOSE, -#define SCTP_AUTOCLOSE SCTP_AUTOCLOSE - SCTP_SET_PEER_PRIMARY_ADDR, -#define SCTP_SET_PEER_PRIMARY_ADDR SCTP_SET_PEER_PRIMARY_ADDR - SCTP_PRIMARY_ADDR, -#define SCTP_PRIMARY_ADDR SCTP_PRIMARY_ADDR - SCTP_ADAPTATION_LAYER, -#define SCTP_ADAPTATION_LAYER SCTP_ADAPTATION_LAYER - SCTP_DISABLE_FRAGMENTS, -#define SCTP_DISABLE_FRAGMENTS SCTP_DISABLE_FRAGMENTS - SCTP_PEER_ADDR_PARAMS, -#define SCTP_PEER_ADDR_PARAMS SCTP_PEER_ADDR_PARAMS - SCTP_DEFAULT_SEND_PARAM, -#define SCTP_DEFAULT_SEND_PARAM SCTP_DEFAULT_SEND_PARAM - SCTP_EVENTS, -#define SCTP_EVENTS SCTP_EVENTS - SCTP_I_WANT_MAPPED_V4_ADDR, /* Turn on/off mapped v4 addresses */ -#define SCTP_I_WANT_MAPPED_V4_ADDR SCTP_I_WANT_MAPPED_V4_ADDR - SCTP_MAXSEG, /* Get/set maximum fragment. */ -#define SCTP_MAXSEG SCTP_MAXSEG - SCTP_STATUS, -#define SCTP_STATUS SCTP_STATUS - SCTP_GET_PEER_ADDR_INFO, -#define SCTP_GET_PEER_ADDR_INFO SCTP_GET_PEER_ADDR_INFO - SCTP_DELAYED_ACK, -#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK -#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK - SCTP_CONTEXT, /* Receive Context */ -#define SCTP_CONTEXT SCTP_CONTEXT - SCTP_FRAGMENT_INTERLEAVE, -#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE - SCTP_PARTIAL_DELIVERY_POINT, /* Set/Get partial delivery point */ -#define SCTP_PARTIAL_DELIVERY_POINT SCTP_PARTIAL_DELIVERY_POINT - SCTP_MAX_BURST, /* Set/Get max burst */ -#define SCTP_MAX_BURST SCTP_MAX_BURST - SCTP_AUTH_CHUNK, /* Set only: add a chunk type to authenticat */ -#define SCTP_AUTH_CHUNK SCTP_AUTH_CHUNK - SCTP_HMAC_IDENT, -#define SCTP_HMAC_IDENT SCTP_HMAC_IDENT - SCTP_AUTH_KEY, -#define SCTP_AUTH_KEY SCTP_AUTH_KEY - SCTP_AUTH_ACTIVE_KEY, -#define SCTP_AUTH_ACTIVE_KEY SCTP_AUTH_ACTIVE_KEY - SCTP_AUTH_DELETE_KEY, -#define SCTP_AUTH_DELETE_KEY SCTP_AUTH_DELETE_KEY - SCTP_PEER_AUTH_CHUNKS, /* Read only */ -#define SCTP_PEER_AUTH_CHUNKS SCTP_PEER_AUTH_CHUNKS - SCTP_LOCAL_AUTH_CHUNKS, /* Read only */ -#define SCTP_LOCAL_AUTH_CHUNKS SCTP_LOCAL_AUTH_CHUNKS - SCTP_GET_ASSOC_NUMBER, /* Read only */ -#define SCTP_GET_ASSOC_NUMBER SCTP_GET_ASSOC_NUMBER - - - /* Internal Socket Options. Some of the sctp library functions are - * implemented using these socket options. - */ - SCTP_SOCKOPT_BINDX_ADD = 100,/* BINDX requests for adding addresses. */ -#define SCTP_SOCKOPT_BINDX_ADD SCTP_SOCKOPT_BINDX_ADD - SCTP_SOCKOPT_BINDX_REM, /* BINDX requests for removing addresses. */ -#define SCTP_SOCKOPT_BINDX_REM SCTP_SOCKOPT_BINDX_REM - SCTP_SOCKOPT_PEELOFF, /* peel off association. */ -#define SCTP_SOCKOPT_PEELOFF SCTP_SOCKOPT_PEELOFF - SCTP_SOCKOPT_CONNECTX_OLD, /* CONNECTX old requests. */ -#define SCTP_SOCKOPT_CONNECTX_OLD SCTP_SOCKOPT_CONNECTX_OLD - SCTP_GET_PEER_ADDRS, /* Get all peer addresss. */ -#define SCTP_GET_PEER_ADDRS SCTP_GET_PEER_ADDRS - SCTP_GET_LOCAL_ADDRS, /* Get all local addresss. */ -#define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS - SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ -#define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX - SCTP_SOCKOPT_CONNECTX3, /* CONNECTX requests. (new implementation) */ -#define SCTP_SOCKOPT_CONNECTX3 SCTP_SOCKOPT_CONNECTX3 -}; +#define SCTP_RTOINFO 0 +#define SCTP_ASSOCINFO 1 +#define SCTP_INITMSG 2 +#define SCTP_NODELAY 3 /* Get/set nodelay option. */ +#define SCTP_AUTOCLOSE 4 +#define SCTP_SET_PEER_PRIMARY_ADDR 5 +#define SCTP_PRIMARY_ADDR 6 +#define SCTP_ADAPTATION_LAYER 7 +#define SCTP_DISABLE_FRAGMENTS 8 +#define SCTP_PEER_ADDR_PARAMS 9 +#define SCTP_DEFAULT_SEND_PARAM 10 +#define SCTP_EVENTS 11 +#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ +#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ +#define SCTP_STATUS 14 +#define SCTP_GET_PEER_ADDR_INFO 15 +#define SCTP_DELAYED_ACK_TIME 16 +#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME +#define SCTP_CONTEXT 17 +#define SCTP_FRAGMENT_INTERLEAVE 18 +#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ +#define SCTP_MAX_BURST 20 /* Set/Get max burst */ +#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ +#define SCTP_HMAC_IDENT 22 +#define SCTP_AUTH_KEY 23 +#define SCTP_AUTH_ACTIVE_KEY 24 +#define SCTP_AUTH_DELETE_KEY 25 +#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ +#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ +#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ + +/* Internal Socket Options. Some of the sctp library functions are + * implemented using these socket options. + */ +#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ +#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ +#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ +/* Options 104-106 are deprecated and removed. Do not use this space */ +#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ +#define SCTP_GET_PEER_ADDRS 108 /* Get all peer addresss. */ +#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local addresss. */ +#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ /* * 5.2.1 SCTP Initiation Structure (SCTP_INIT) -- cgit v1.2.3-59-g8ed1b From 46d5a808558181e03a4760d2188cc9879445738a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Update max.burst implementation Current implementation of max.burst ends up limiting new data during cwnd decay period. The decay is happening becuase the connection is idle and we are allowed to fill the congestion window. The point of max.burst is to limit micro-bursts in response to large acks. This still happens, as max.burst is still applied to each transmit opportunity. It will also apply if a very large send is made (greater then allowed by burst). Tested-by: Florian Niederbacher Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 4 ++++ net/sctp/associola.c | 1 + net/sctp/output.c | 23 ----------------------- net/sctp/outqueue.c | 15 +++++++++++++++ net/sctp/transport.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 23 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ac794a6823eb..bc3f8d879c5c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -942,6 +942,8 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; + __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + /* TSN marking the fast recovery exit point */ __u32 fast_recovery_exit; @@ -1070,6 +1072,8 @@ void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_burst_limited(struct sctp_transport *); +void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sctp_transport *, u32); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 37e982510bea..880dae2ca87b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -738,6 +738,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; + peer->burst_limited = 0; /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; diff --git a/net/sctp/output.c b/net/sctp/output.c index 9e8e0ea844be..b210d2077e28 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -615,7 +615,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; - __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; @@ -648,28 +647,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, } } - /* sctpimpguide-05 2.14.2 - * D) When the time comes for the sender to - * transmit new DATA chunks, the protocol parameter Max.Burst MUST - * first be applied to limit how many new DATA chunks may be sent. - * The limit is applied by adjusting cwnd as follows: - * if ((flightsize + Max.Burst * MTU) < cwnd) - * cwnd = flightsize + Max.Burst * MTU - */ - max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = flight_size + max_burst_bytes; - SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " - "transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, " - "pba: %d\n", - __func__, transport, - transport->cwnd, - transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - } - /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5732661c87d3..2f2377369e2b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -931,6 +931,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) goto sctp_flush_out; } + /* Apply Max.Burst limitation to the current transport in + * case it will be used for new data. We are going to + * rest it before we return, but we want to apply the limit + * to the currently queued data. + */ + if (transport) + sctp_transport_burst_limited(transport); + /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid @@ -976,6 +984,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) packet = &transport->packet; sctp_packet_config(packet, vtag, asoc->peer.ecn_capable); + /* We've switched transports, so apply the + * Burst limit to the new transport. + */ + sctp_transport_burst_limited(transport); } SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", @@ -1070,6 +1082,9 @@ sctp_flush_out: packet = &t->packet; if (!sctp_packet_empty(packet)) error = sctp_packet_transmit(packet); + + /* Clear the burst limited state, if any */ + sctp_transport_burst_reset(t); } return error; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2df29cbdaf5a..9a6cb22d129f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -576,6 +576,43 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->cwnd, transport->ssthresh); } +/* Apply Max.Burst limit to the congestion window: + * sctpimpguide-05 2.14.2 + * D) When the time comes for the sender to + * transmit new DATA chunks, the protocol parameter Max.Burst MUST + * first be applied to limit how many new DATA chunks may be sent. + * The limit is applied by adjusting cwnd as follows: + * if ((flightsize+ Max.Burst * MTU) < cwnd) + * cwnd = flightsize + Max.Burst * MTU + */ + +void sctp_transport_burst_limited(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + u32 old_cwnd = t->cwnd; + u32 max_burst_bytes; + + if (t->burst_limited) + return; + + max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); + if (max_burst_bytes < old_cwnd) { + t->cwnd = max_burst_bytes; + t->burst_limited = old_cwnd; + } +} + +/* Restore the old cwnd congestion window, after the burst had it's + * desired effect. + */ +void sctp_transport_burst_reset(struct sctp_transport *t) +{ + if (t->burst_limited) { + t->cwnd = t->burst_limited; + t->burst_limited = 0; + } +} + /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *t) { @@ -598,6 +635,7 @@ void sctp_transport_reset(struct sctp_transport *t) * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->last_rto = t->rto = asoc->rto_initial; t->rtt = 0; -- cgit v1.2.3-59-g8ed1b From d8dd15781dd621c5ceab79083f4c5112787863f5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Fix mis-ordering of user space data when multihoming in use Recently had a bug reported to me, in which the user was sending packets with a payload containing a sequence number. The packets were getting delivered in order according the chunk TSN values, but the sequence values in the payload were arriving out of order. At first I thought it must be an application error, but we eventually found it to be a problem on the transmit side in the sctp stack. The conditions for the error are that multihoming must be in use, and it helps if each transport has a different pmtu. The problem occurs in sctp_outq_flush. Basically we dequeue packets from the data queue, and attempt to append them to the orrered packet for a given transport. After we append a data chunk we add the trasport to the end of a list of transports to have their packets sent at the end of sctp_outq_flush. The problem occurs when a data chunks fills up a offered packet on a transport. The function that does the appending (sctp_packet_transmit_chunk), will try to call sctp_packet_transmit on the full packet, and then append the chunk to a new packet. This call to sctp_packet_transmit, sends that packet ahead of the others that may be queued in the transport_list in sctp_outq_flush. The result is that frames that were sent in one order from the user space sending application get re-ordered prior to tsn assignment in sctp_packet_transmit, resulting in mis-sequencing of data payloads, even though tsn ordering is correct. The fix is to change where we assign a tsn. By doing this earlier, we are then free to place chunks in packets, whatever way we see fit and the protocol will make sure to do all the appropriate re-ordering on receive as is needed. Signed-off-by: Neil Horman Reported-by: William Reich Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index b210d2077e28..7c5589363433 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,23 +429,22 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { - if (!chunk->has_tsn) { - sctp_chunk_assign_ssn(chunk); - sctp_chunk_assign_tsn(chunk); - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ + if (!chunk->resent) { + + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ if (!tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } - } else - chunk->resent = 1; + } + + chunk->resent = 1; has_data = 1; } @@ -722,6 +721,8 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Has been accepted for transmission. */ if (!asoc->peer.prsctp_capable) chunk->msg->can_abandon = 0; + sctp_chunk_assign_tsn(chunk); + sctp_chunk_assign_ssn(chunk); } static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, -- cgit v1.2.3-59-g8ed1b From f6778aab6ccc4b510b4dcfa770d9949b696b4545 Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: limit maximum autoclose setsockopt value To avoid overflowing the maximum timer interval when transforming the autoclose interval from seconds to jiffies, limit the maximum autoclose value to MAX_SCHEDULE_TIMEOUT/HZ. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d2681a6bc6fa..71513b3926a5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2086,6 +2086,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ + if (sp->autoclose > (MAX_SCHEDULE_TIMEOUT / HZ) ) + sp->autoclose = MAX_SCHEDULE_TIMEOUT / HZ ; return 0; } -- cgit v1.2.3-59-g8ed1b From da85b7396f3b6cb3fea7d77091498bfa1051ef7c Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: fix integer overflow when setting the autoclose timer When setting the autoclose timeout in jiffies there is a possible integer overflow if the value in seconds is very large (e.g. for 2^22 s with HZ=1024). The problem appears even on 64-bit due to the integer promotion rules. The fix is just a cast to unsigned long. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 880dae2ca87b..6e96f83570c9 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -167,7 +167,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - sp->autoclose * HZ; + (unsigned long)sp->autoclose * HZ; /* Initilizes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) -- cgit v1.2.3-59-g8ed1b From 4814326b59db0cfd18ac652626d955ad3f57fb0f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: prevent too-fast association id reuse We use the idr subsystem and always ask for an id at or above 1. This results in a id reuse when one association is terminated while another is created. To prevent re-use, we keep track of the last id returned and ask for that id + 1 as a base for each query. We let the idr spin lock protect this base id as well. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6e96f83570c9..df5abbff63e2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -63,6 +63,12 @@ static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); +/* Keep track of the new idr low so that we don't re-use association id + * numbers too fast. It is protected by they idr spin lock is in the + * range of 1 - INT_MAX. + */ +static u32 idr_low = 1; + /* 1st Level Abstractions. */ @@ -1553,7 +1559,12 @@ retry: spin_lock_bh(&sctp_assocs_id_lock); error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, - 1, &assoc_id); + idr_low, &assoc_id); + if (!error) { + idr_low = assoc_id + 1; + if (idr_low == INT_MAX) + idr_low = 1; + } spin_unlock_bh(&sctp_assocs_id_lock); if (error == -EAGAIN) goto retry; -- cgit v1.2.3-59-g8ed1b