From f03d78db65085609938fdb686238867e65003181 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jul 2011 00:27:05 -0700 Subject: net: refine {udp|tcp|sctp}_mem limits Current tcp/udp/sctp global memory limits are not taking into account hugepages allocations, and allow 50% of ram to be used by buffers of a single protocol [ not counting space used by sockets / inodes ...] Lets use nr_free_buffer_pages() and allow a default of 1/8 of kernel ram per protocol, and a minimum of 128 pages. Heavy duty machines sysadmins probably need to tweak limits anyway. References: https://bugzilla.stlinux.com/show_bug.cgi?id=38032 Reported-by: starlight Suggested-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 67380a29e2e9..207175b2f40a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1058,7 +1058,6 @@ SCTP_STATIC __init int sctp_init(void) int status = -EINVAL; unsigned long goal; unsigned long limit; - unsigned long nr_pages; int max_share; int order; @@ -1148,15 +1147,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); - /* Set the pressure threshold to be a fraction of global memory that - * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. - * Note this initializes the data in sctpv6_prot too - * Unabashedly stolen from tcp_init - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_sctp_mem[0] = limit / 4 * 3; sysctl_sctp_mem[1] = limit; -- cgit v1.2.3-59-g8ed1b From 949123016a2ef578009b6aa3e98d45d1a154ebfb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 2 Jul 2011 09:28:04 +0000 Subject: sctp: fix missing send up SCTP_SENDER_DRY_EVENT when subscribe it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to send up SCTP_SENDER_DRY_EVENT notification when user app subscribes to this event, and there is no data to be sent or retransmit. This is required by the Socket API and used by the DTLS/SCTP implementation. Reported-by: Michael Tüxen Signed-off-by: Wei Yongjun Tested-by: Robin Seggelmann Signed-off-by: David S. Miller --- net/sctp/socket.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6766913a53e6..08c6238802de 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2073,10 +2073,33 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk, static int sctp_setsockopt_events(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_association *asoc; + struct sctp_ulpevent *event; + if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; + + /* + * At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, + * if there is no data to be sent or retransmit, the stack will + * immediately send up this notification. + */ + if (sctp_ulpevent_type_enabled(SCTP_SENDER_DRY_EVENT, + &sctp_sk(sk)->subscribe)) { + asoc = sctp_id2assoc(sk, 0); + + if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_ATOMIC); + if (!event) + return -ENOMEM; + + sctp_ulpq_tail_event(&asoc->ulpq, event); + } + } + return 0; } -- cgit v1.2.3-59-g8ed1b From f8d9605243280f1870dd2c6c37a735b925c15f3c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 7 Jul 2011 00:28:35 +0000 Subject: sctp: Enforce retransmission limit during shutdown When initiating a graceful shutdown while having data chunks on the retransmission queue with a peer which is in zero window mode the shutdown is never completed because the retransmission error count is reset periodically by the following two rules: - Do not timeout association while doing zero window probe. - Reset overall error count when a heartbeat request has been acknowledged. The graceful shutdown will wait for all outstanding TSN to be acknowledged before sending the SHUTDOWN request. This never happens due to the peer's zero window not acknowledging the continuously retransmitted data chunks. Although the error counter is incremented for each failed retransmission, the receiving of the SACK announcing the zero window clears the error count again immediately. Also heartbeat requests continue to be sent periodically. The peer acknowledges these requests causing the error counter to be reset as well. This patch changes behaviour to only reset the overall error counter for the above rules while not in shutdown. After reaching the maximum number of retransmission attempts, the T5 shutdown guard timer is scheduled to give the receiver some additional time to recover. The timer is stopped as soon as the receiver acknowledges any data. The issue can be easily reproduced by establishing a sctp association over the loopback device, constantly queueing data at the sender while not reading any at the receiver. Wait for the window to reach zero, then initiate a shutdown by killing both processes simultaneously. The association will never be freed and the chunks on the retransmission queue will be retransmitted indefinitely. Signed-off-by: Thomas Graf Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + net/sctp/outqueue.c | 20 +++++++++++++++++++- net/sctp/sm_sideeffect.c | 20 ++++++++++++++++++-- net/sctp/sm_statefuns.c | 32 +++++++++++++++++++++++--------- net/sctp/sm_statetable.c | 2 +- 5 files changed, 62 insertions(+), 13 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index dd6847e5d6e4..6506458ccd33 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -63,6 +63,7 @@ typedef enum { SCTP_CMD_ECN_ECNE, /* Do delayed ECNE processing. */ SCTP_CMD_ECN_CWR, /* Do delayed CWR processing. */ SCTP_CMD_TIMER_START, /* Start a timer. */ + SCTP_CMD_TIMER_START_ONCE, /* Start a timer once */ SCTP_CMD_TIMER_RESTART, /* Restart a timer. */ SCTP_CMD_TIMER_STOP, /* Stop a timer. */ SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1c88c8911dc5..d03682109b7a 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1582,6 +1582,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, #endif /* SCTP_DEBUG */ if (transport) { if (bytes_acked) { + struct sctp_association *asoc = transport->asoc; + /* We may have counted DATA that was migrated * to this transport due to DEL-IP operation. * Subtract those bytes, since the were never @@ -1600,6 +1602,17 @@ static void sctp_check_transmitted(struct sctp_outq *q, transport->error_count = 0; transport->asoc->overall_error_count = 0; + /* + * While in SHUTDOWN PENDING, we may have started + * the T5 shutdown guard timer after reaching the + * retransmission limit. Stop that timer as soon + * as the receiver acknowledged any data. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING && + del_timer(&asoc->timers + [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD])) + sctp_association_put(asoc); + /* Mark the destination transport address as * active if it is not so marked. */ @@ -1629,10 +1642,15 @@ static void sctp_check_transmitted(struct sctp_outq *q, * A sender is doing zero window probing when the * receiver's advertised window is zero, and there is * only one data chunk in flight to the receiver. + * + * Allow the association to timeout while in SHUTDOWN + * PENDING or SHUTDOWN RECEIVED in case the receiver + * stays in zero window mode forever. */ if (!q->asoc->peer.rwnd && !list_empty(&tlist) && - (sack_ctsn+2 == q->asoc->next_tsn)) { + (sack_ctsn+2 == q->asoc->next_tsn) && + q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { SCTP_DEBUG_PRINTK("%s: SACK received for zero " "window probe: %u\n", __func__, sack_ctsn); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 534c2e5feb05..6e0f88295aaf 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -670,10 +670,19 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the * HEARTBEAT should clear the error counter of the destination * transport address to which the HEARTBEAT was sent. - * The association's overall error count is also cleared. */ t->error_count = 0; - t->asoc->overall_error_count = 0; + + /* + * Although RFC4960 specifies that the overall error count must + * be cleared when a HEARTBEAT ACK is received, we make an + * exception while in SHUTDOWN PENDING. If the peer keeps its + * window shut forever, we may never be able to transmit our + * outstanding data and rely on the retransmission limit be reached + * to shutdown the association. + */ + if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING) + t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good * acknowledgement. @@ -1437,6 +1446,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); break; + case SCTP_CMD_TIMER_START_ONCE: + timer = &asoc->timers[cmd->obj.to]; + + if (timer_pending(timer)) + break; + /* fall through */ + case SCTP_CMD_TIMER_START: timer = &asoc->timers[cmd->obj.to]; timeout = asoc->timeouts[cmd->obj.to]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a297283154d5..246117142b5c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5154,7 +5154,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( * The sender of the SHUTDOWN MAY also start an overall guard timer * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); if (asoc->autoclose) @@ -5299,14 +5299,28 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { - sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, - SCTP_ERROR(ETIMEDOUT)); - /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - return SCTP_DISPOSITION_DELETE_TCB; + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { + /* + * We are here likely because the receiver had its rwnd + * closed for a while and we have not been able to + * transmit the locally queued data within the maximum + * retransmission attempts limit. Start the T5 + * shutdown guard timer to give the receiver one last + * chance and some additional time to recover before + * aborting. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE, + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + } else { + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ETIMEDOUT)); + /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_NO_ERROR)); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + return SCTP_DISPOSITION_DELETE_TCB; + } } /* E1) For the destination address for which the timer diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 0338dc6fdc9d..7c211a7f90f4 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -827,7 +827,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ -- cgit v1.2.3-59-g8ed1b From cd4fcc704f30f2064ab30b5300d44d431e46db50 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 8 Jul 2011 04:37:46 +0000 Subject: sctp: ABORT if receive, reassmbly, or reodering queue is not empty while closing socket Trigger user ABORT if application closes a socket which has data queued on the socket receive queue or chunks waiting on the reassembly or ordering queue as this would imply data being lost which defeats the point of a graceful shutdown. This behavior is already practiced in TCP. We do not check the input queue because that would mean to parse all chunks on it to look for unacknowledged data which seems too much of an effort. Control chunks or duplicated chunks may also be in the input queue and should not be stopping a graceful shutdown. Signed-off-by: Thomas Graf Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 2 +- net/sctp/socket.c | 13 ++++++++----- net/sctp/ulpevent.c | 16 +++++++++++++--- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 99b027b2adce..ca4693b4e09e 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -80,7 +80,7 @@ static inline struct sctp_ulpevent *sctp_skb2event(struct sk_buff *skb) void sctp_ulpevent_free(struct sctp_ulpevent *); int sctp_ulpevent_is_notification(const struct sctp_ulpevent *); -void sctp_queue_purge_ulpevents(struct sk_buff_head *list); +unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list); struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( const struct sctp_association *asoc, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 08c6238802de..d3ccf7973c59 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1384,6 +1384,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; + unsigned int data_was_unread; SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); @@ -1393,6 +1394,10 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) ep = sctp_sk(sk)->ep; + /* Clean up any skbs sitting on the receive queue. */ + data_was_unread = sctp_queue_purge_ulpevents(&sk->sk_receive_queue); + data_was_unread += sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby); + /* Walk all associations on an endpoint. */ list_for_each_safe(pos, temp, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); @@ -1410,7 +1415,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) } } - if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || + !skb_queue_empty(&asoc->ulpq.reasm) || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); @@ -1420,10 +1427,6 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) sctp_primitive_SHUTDOWN(asoc, NULL); } - /* Clean up any skbs sitting on the receive queue. */ - sctp_queue_purge_ulpevents(&sk->sk_receive_queue); - sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby); - /* On a TCP-style socket, block for at most linger_time if set. */ if (sctp_style(sk, TCP) && timeout) sctp_wait_for_close(sk, timeout); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e70e5fc87890..8a84017834c2 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -1081,9 +1081,19 @@ void sctp_ulpevent_free(struct sctp_ulpevent *event) } /* Purge the skb lists holding ulpevents. */ -void sctp_queue_purge_ulpevents(struct sk_buff_head *list) +unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list) { struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) - sctp_ulpevent_free(sctp_skb2event(skb)); + unsigned int data_unread = 0; + + while ((skb = skb_dequeue(list)) != NULL) { + struct sctp_ulpevent *event = sctp_skb2event(skb); + + if (!sctp_ulpevent_is_notification(event)) + data_unread += skb->len; + + sctp_ulpevent_free(event); + } + + return data_unread; } -- cgit v1.2.3-59-g8ed1b