aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2014-09-26 22:37:33 +0200
committerDavid S. Miller <davem@davemloft.net>2014-09-29 00:13:10 -0400
commit30e502a34b8b21fae2c789da102bd9f6e99fef83 (patch)
tree6a56bc051b629e3c0914c3a519d201f357e79ada /net/ipv4
parentnet: tcp: assign tcp cong_ops when tcp sk is created (diff)
downloadlinux-dev-30e502a34b8b21fae2c789da102bd9f6e99fef83.tar.xz
linux-dev-30e502a34b8b21fae2c789da102bd9f6e99fef83.zip
net: tcp: add flag for ca to indicate that ECN is required
This patch adds a flag to TCP congestion algorithms that allows for requesting to mark IPv4/IPv6 sockets with transport as ECN capable, that is, ECT(0), when required by a congestion algorithm. It is currently used and needed in DataCenter TCP (DCTCP), as it requires both peers to assert ECT on all IP packets sent - it uses ECN feedback (i.e. CE, Congestion Encountered information) from switches inside the data center to derive feedback to the end hosts. Therefore, simply add a new flag to icsk_ca_ops. Note that DCTCP's algorithm/behaviour slightly diverges from RFC3168, therefore this is only (!) enabled iff the assigned congestion control ops module has requested this. By that, we can tightly couple this logic really only to the provided congestion control ops. Joint work with Florian Westphal and Glenn Judd. Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com> Acked-by: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c25
2 files changed, 19 insertions, 8 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5073eefa6fae..fb0fe97e1c54 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5944,7 +5944,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb, sock_net(sk));
+ TCP_ECN_create_request(req, skb, sk);
if (want_cookie) {
isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4d92703df4c6..20e73271d75c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -318,11 +318,15 @@ static u16 tcp_select_window(struct sock *sk)
}
/* Packet ECN state for a SYN-ACK */
-static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(struct sock *sk, struct sk_buff *skb)
{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
+ else if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
}
/* Packet ECN state for a SYN. */
@@ -331,17 +335,24 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
tp->ecn_flags = 0;
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ tcp_ca_needs_ecn(sk)) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
+ if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
}
}
static __inline__ void
-TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
+TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th,
+ struct sock *sk)
{
- if (inet_rsk(req)->ecn_ok)
+ if (inet_rsk(req)->ecn_ok) {
th->ece = 1;
+ if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+ }
}
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
@@ -362,7 +373,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
tcp_hdr(skb)->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}
- } else {
+ } else if (!tcp_ca_needs_ecn(sk)) {
/* ACK or retransmitted segment: clear ECT|CE */
INET_ECN_dontxmit(sk);
}
@@ -2789,7 +2800,7 @@ int tcp_send_synack(struct sock *sk)
}
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
- TCP_ECN_send_synack(tcp_sk(sk), skb);
+ TCP_ECN_send_synack(sk, skb);
}
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
@@ -2848,7 +2859,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- TCP_ECN_make_synack(req, th);
+ TCP_ECN_make_synack(req, th, sk);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is