diff options
Diffstat (limited to 'net/netfilter/nf_conntrack_proto_tcp.c')
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 321 |
1 files changed, 201 insertions, 120 deletions
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a634c72b1ffc..656631083177 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -47,6 +47,12 @@ static const char *const tcp_conntrack_names[] = { "SYN_SENT2", }; +enum nf_ct_tcp_action { + NFCT_TCP_IGNORE, + NFCT_TCP_INVALID, + NFCT_TCP_ACCEPT, +}; + #define SECS * HZ #define MINS * 60 SECS #define HOURS * 60 MINS @@ -472,23 +478,45 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, } } -static bool tcp_in_window(struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned int index, - const struct sk_buff *skb, - unsigned int dataoff, - const struct tcphdr *tcph, - const struct nf_hook_state *hook_state) +__printf(6, 7) +static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const struct nf_hook_state *state, + const struct ip_ct_tcp_state *sender, + enum nf_ct_tcp_action ret, + const char *fmt, ...) +{ + const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct)); + struct va_format vaf; + va_list args; + bool be_liberal; + + be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal; + if (be_liberal) + return NFCT_TCP_ACCEPT; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf); + va_end(args); + + return ret; +} + +static enum nf_ct_tcp_action +tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, + unsigned int index, const struct sk_buff *skb, + unsigned int dataoff, const struct tcphdr *tcph, + const struct nf_hook_state *hook_state) { struct ip_ct_tcp *state = &ct->proto.tcp; - struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; __u32 seq, ack, sack, end, win, swin; - u16 win_raw; + bool in_recv_win, seq_ok; s32 receiver_offset; - bool res, in_recv_win; + u16 win_raw; /* * Get the required data from the packet. @@ -517,7 +545,7 @@ static bool tcp_in_window(struct nf_conn *ct, end, win); if (!tcph->ack) /* Simultaneous open */ - return true; + return NFCT_TCP_ACCEPT; } else { /* * We are in the middle of a connection, @@ -560,7 +588,7 @@ static bool tcp_in_window(struct nf_conn *ct, end, win); if (dir == IP_CT_DIR_REPLY && !tcph->ack) - return true; + return NFCT_TCP_ACCEPT; } if (!(tcph->ack)) { @@ -584,122 +612,166 @@ static bool tcp_in_window(struct nf_conn *ct, */ seq = end = sender->td_end; - /* Is the ending sequence in the receive window (if available)? */ - in_recv_win = !receiver->td_maxwin || - after(end, sender->td_end - receiver->td_maxwin - 1); - - if (before(seq, sender->td_maxend + 1) && - in_recv_win && - before(sack, receiver->td_end + 1) && - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { - /* - * Take into account window scaling (RFC 1323). - */ - if (!tcph->syn) - win <<= sender->td_scale; - - /* - * Update sender data. - */ - swin = win + (sack - ack); - if (sender->td_maxwin < swin) - sender->td_maxwin = swin; - if (after(end, sender->td_end)) { + seq_ok = before(seq, sender->td_maxend + 1); + if (!seq_ok) { + u32 overshot = end - sender->td_maxend + 1; + bool ack_ok; + + ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); + in_recv_win = receiver->td_maxwin && + after(end, sender->td_end - receiver->td_maxwin - 1); + + if (in_recv_win && + ack_ok && + overshot <= receiver->td_maxwin && + before(sack, receiver->td_end + 1)) { + /* Work around TCPs that send more bytes than allowed by + * the receive window. + * + * If the (marked as invalid) packet is allowed to pass by + * the ruleset and the peer acks this data, then its possible + * all future packets will trigger 'ACK is over upper bound' check. + * + * Thus if only the sequence check fails then do update td_end so + * possible ACK for this data can update internal state. + */ sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; - } - if (tcph->ack) { - if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { - sender->td_maxack = ack; - sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; - } else if (after(ack, sender->td_maxack)) - sender->td_maxack = ack; + + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "%u bytes more than expected", overshot); } - /* - * Update receiver data. - */ - if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) - receiver->td_maxwin += end - sender->td_maxend; - if (after(sack + win, receiver->td_maxend - 1)) { - receiver->td_maxend = sack + win; - if (win == 0) - receiver->td_maxend++; + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, + "SEQ is over upper bound %u (over the window of the receiver)", + sender->td_maxend + 1); + } + + if (!before(sack, receiver->td_end + 1)) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, + "ACK is over upper bound %u (ACKed data not seen yet)", + receiver->td_end + 1); + + /* Is the ending sequence in the receive window (if available)? */ + in_recv_win = !receiver->td_maxwin || + after(end, sender->td_end - receiver->td_maxwin - 1); + if (!in_recv_win) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "SEQ is under lower bound %u (already ACKed data retransmitted)", + sender->td_end - receiver->td_maxwin - 1); + if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "ignored ACK under lower bound %u (possible overly delayed)", + receiver->td_end - MAXACKWINDOW(sender) - 1); + + /* Take into account window scaling (RFC 1323). */ + if (!tcph->syn) + win <<= sender->td_scale; + + /* Update sender data. */ + swin = win + (sack - ack); + if (sender->td_maxwin < swin) + sender->td_maxwin = swin; + if (after(end, sender->td_end)) { + sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) { + sender->td_maxack = ack; } - if (ack == receiver->td_end) - receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } - /* - * Check retransmissions. - */ - if (index == TCP_ACK_SET) { - if (state->last_dir == dir - && state->last_seq == seq - && state->last_ack == ack - && state->last_end == end - && state->last_win == win_raw) - state->retrans++; - else { - state->last_dir = dir; - state->last_seq = seq; - state->last_ack = ack; - state->last_end = end; - state->last_win = win_raw; - state->retrans = 0; - } + /* Update receiver data. */ + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) + receiver->td_maxwin += end - sender->td_maxend; + if (after(sack + win, receiver->td_maxend - 1)) { + receiver->td_maxend = sack + win; + if (win == 0) + receiver->td_maxend++; + } + if (ack == receiver->td_end) + receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + + /* Check retransmissions. */ + if (index == TCP_ACK_SET) { + if (state->last_dir == dir && + state->last_seq == seq && + state->last_ack == ack && + state->last_end == end && + state->last_win == win_raw) { + state->retrans++; + } else { + state->last_dir = dir; + state->last_seq = seq; + state->last_ack = ack; + state->last_end = end; + state->last_win = win_raw; + state->retrans = 0; } - res = true; - } else { - res = false; - if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || - tn->tcp_be_liberal) - res = true; - if (!res) { - bool seq_ok = before(seq, sender->td_maxend + 1); - - if (!seq_ok) { - u32 overshot = end - sender->td_maxend + 1; - bool ack_ok; - - ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); - - if (in_recv_win && - ack_ok && - overshot <= receiver->td_maxwin && - before(sack, receiver->td_end + 1)) { - /* Work around TCPs that send more bytes than allowed by - * the receive window. - * - * If the (marked as invalid) packet is allowed to pass by - * the ruleset and the peer acks this data, then its possible - * all future packets will trigger 'ACK is over upper bound' check. - * - * Thus if only the sequence check fails then do update td_end so - * possible ACK for this data can update internal state. - */ - sender->td_end = end; - sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; - - nf_ct_l4proto_log_invalid(skb, ct, hook_state, - "%u bytes more than expected", overshot); - return res; - } - } + } + + return NFCT_TCP_ACCEPT; +} + +static void __cold nf_tcp_handle_invalid(struct nf_conn *ct, + enum ip_conntrack_dir dir, + int index, + const struct sk_buff *skb, + const struct nf_hook_state *hook_state) +{ + const unsigned int *timeouts; + const struct nf_tcp_net *tn; + unsigned int timeout; + u32 expires; + + if (!test_bit(IPS_ASSURED_BIT, &ct->status) || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) + return; + + /* We don't want to have connections hanging around in ESTABLISHED + * state for long time 'just because' conntrack deemed a FIN/RST + * out-of-window. + * + * Shrink the timeout just like when there is unacked data. + * This speeds up eviction of 'dead' connections where the + * connection and conntracks internal state are out of sync. + */ + switch (index) { + case TCP_RST_SET: + case TCP_FIN_SET: + break; + default: + return; + } + + if (ct->proto.tcp.last_dir != dir && + (ct->proto.tcp.last_index == TCP_FIN_SET || + ct->proto.tcp.last_index == TCP_RST_SET)) { + expires = nf_ct_expires(ct); + if (expires < 120 * HZ) + return; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) + timeouts = tn->timeouts; + timeout = READ_ONCE(timeouts[TCP_CONNTRACK_UNACK]); + if (expires > timeout) { nf_ct_l4proto_log_invalid(skb, ct, hook_state, - "%s", - before(seq, sender->td_maxend + 1) ? - in_recv_win ? - before(sack, receiver->td_end + 1) ? - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" - : "ACK is under the lower bound (possible overly delayed ACK)" - : "ACK is over the upper bound (ACKed data not seen yet)" - : "SEQ is under the lower bound (already ACKed data retransmitted)" - : "SEQ is over the upper bound (over the window of the receiver)"); + "packet (index %d, dir %d) response for index %d lower timeout to %u", + index, dir, ct->proto.tcp.last_index, timeout); + + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); } + } else { + ct->proto.tcp.last_index = index; + ct->proto.tcp.last_dir = dir; } - - return res; } /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ @@ -861,6 +933,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; + enum nf_ct_tcp_action res; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; @@ -1126,10 +1199,18 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, break; } - if (!tcp_in_window(ct, dir, index, - skb, dataoff, th, state)) { + res = tcp_in_window(ct, dir, index, + skb, dataoff, th, state); + switch (res) { + case NFCT_TCP_IGNORE: + spin_unlock_bh(&ct->lock); + return NF_ACCEPT; + case NFCT_TCP_INVALID: + nf_tcp_handle_invalid(ct, dir, index, skb, state); spin_unlock_bh(&ct->lock); return -NF_ACCEPT; + case NFCT_TCP_ACCEPT: + break; } in_window: /* From now on we have got in-window packets */ |