aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_fragment.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-27 13:03:31 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-27 13:03:31 -0400
commit837b9955b1dd22719f0c7dcf321690f5392d99c3 (patch)
tree7e5f00b4c156f9549b8b3eeea92ce2a12dd1f0da /net/ipv4/ip_fragment.c
parentMerge branch 'phy_rgmii' (diff)
parentip_fragment: don't forward defragmented DF packet (diff)
downloadlinux-dev-837b9955b1dd22719f0c7dcf321690f5392d99c3.tar.xz
linux-dev-837b9955b1dd22719f0c7dcf321690f5392d99c3.zip
Merge branch 'ip_frag_next'
Florian Westphal says: ==================== net: force refragmentation for DF reassembed skbs output path tests: if (skb->len > mtu) ip_fragment() This breaks connectivity in one corner case: If the skb was reassembled, but has the DF bit set and .. .. its reassembled size is <= outdev mtu .. .. we will forward a DF packet larger than what the sender transmitted on wire. If a router later in the path can't forward this packet, it will send an icmp error in response to an mtu that the original sender never exceeded. This changes ipv4 defrag/output path to a) force refragmentation for DF reassembled skbs and b) set DF bit on all fragments when refragmenting if it was set on original frags. tested via: from scapy.all import * dip="10.23.42.2" payload="A"*1400 packet=IP(dst=dip,id=12345,flags='DF')/UDP(sport=42,dport=42)/payload frags=fragment(packet,fragsize=1200) for fragment in frags: send(fragment) Without this patch, we generate fragments without df bit set based on the outgoing device mtu when fragmenting after forwarding, ie. IP (ttl 64, id 12345, offset 0, flags [+, DF], proto UDP (17), length 1204) 192.168.7.1.42 > 10.23.42.2.42: UDP, length 1400 IP (ttl 64, id 12345, offset 1184, flags [DF], proto UDP (17), length 244) 192.168.7.1 > 10.23.42.2: ip-proto-17 on ingress will either turn into IP (ttl 63, id 12345, offset 0, flags [+], proto UDP (17), length 1396) 192.168.7.1.42 > 10.23.42.2.42: UDP, length 1400 IP (ttl 63, id 12345, offset 1376, flags [none], proto UDP (17), length 52) (mtu 1400: We strip df and send larger fragment), or IP (ttl 63, id 12345, offset 0, flags [DF], proto UDP (17), length 1428) 192.168.7.1.42 > 10.23.42.2.42: [udp sum ok] UDP, length 1400 if mtu is 1500. And in this case things break; router with a smaller mtu will send icmp error, but original sender only sent packets <= 1204 byte. With patch, we keep intent of such fragments and will emit DF-fragments that won't exceed 1204 byte in size. Joint work with Hannes Frederic Sowa. Changes since v2: - split unrelated patches from series - rework changelog of patch #2 to better illustrate breakage ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r--net/ipv4/ip_fragment.c31
1 files changed, 26 insertions, 5 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 47fa64ee82b1..a50dc6d408d1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -75,6 +75,7 @@ struct ipq {
__be16 id;
u8 protocol;
u8 ecn; /* RFC3168 support */
+ u16 max_df_size; /* largest frag with DF set seen */
int iif;
unsigned int rid;
struct inet_peer *peer;
@@ -326,6 +327,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
struct net_device *dev;
+ unsigned int fragsize;
int flags, offset;
int ihl, end;
int err = -ENOENT;
@@ -481,9 +483,14 @@ found:
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
+ fragsize = skb->len + ihl;
+
+ if (fragsize > qp->q.max_size)
+ qp->q.max_size = fragsize;
+
if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
- skb->len + ihl > qp->q.max_size)
- qp->q.max_size = skb->len + ihl;
+ fragsize > qp->max_df_size)
+ qp->max_df_size = fragsize;
if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
qp->q.meat == qp->q.len) {
@@ -613,13 +620,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->next = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
- IPCB(head)->frag_max_size = qp->q.max_size;
+ IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
iph = ip_hdr(head);
- /* max_size != 0 implies at least one fragment had IP_DF set */
- iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
iph->tot_len = htons(len);
iph->tos |= ecn;
+
+ /* When we set IP_DF on a refragmented skb we must also force a
+ * call to ip_fragment to avoid forwarding a DF-skb of size s while
+ * original sender only sent fragments of size f (where f < s).
+ *
+ * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
+ * frag seen to avoid sending tiny DF-fragments in case skb was built
+ * from one very small df-fragment and one large non-df frag.
+ */
+ if (qp->max_df_size == qp->q.max_size) {
+ IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+ iph->frag_off = htons(IP_DF);
+ } else {
+ iph->frag_off = 0;
+ }
+
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
qp->q.fragments_tail = NULL;