aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2013-10-27 17:29:11 +0100
committerDavid S. Miller <davem@davemloft.net>2013-10-29 00:15:22 -0400
commitdaba287b299ec7a2c61ae3a714920e90e8396ad5 (patch)
tree388feed8fdf3b531da0d365a06d36ae6fcd5bdab /net
parentvirtio_net: migrate mergeable rx buffers to page frag allocators (diff)
downloadlinux-dev-daba287b299ec7a2c61ae3a714920e90e8396ad5.tar.xz
linux-dev-daba287b299ec7a2c61ae3a714920e90e8396ad5.zip
ipv4: fix DO and PROBE pmtu mode regarding local fragmentation with UFO/CORK
UFO as well as UDP_CORK do not respect IP_PMTUDISC_DO and IP_PMTUDISC_PROBE well enough. UFO enabled packet delivery just appends all frags to the cork and hands it over to the network card. So we just deliver non-DF udp fragments (DF-flag may get overwritten by hardware or virtual UFO enabled interface). UDP_CORK does enqueue the data until the cork is disengaged. At this point it sets the correct IP_DF and local_df flags and hands it over to ip_fragment which in this case will generate an icmp error which gets appended to the error socket queue. This is not reflected in the syscall error (of course, if UFO is enabled this also won't happen). Improve this by checking the pmtudisc flags before appending data to the socket and if we still can fit all data in one packet when IP_PMTUDISC_DO or IP_PMTUDISC_PROBE is set, only then proceed. We use (mtu-fragheaderlen) to check for the maximum length because we ensure not to generate a fragment and non-fragmented data does not need to have its length aligned on 64 bit boundaries. Also the passed in ip_options are already aligned correctly. Maybe, we can relax some other checks around ip_fragment. This needs more research. Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/ip_output.c12
1 files changed, 8 insertions, 4 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8fbac7de1e1b..51be64e18e32 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -810,7 +810,7 @@ static int __ip_append_data(struct sock *sk,
int copy;
int err;
int offset = 0;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
@@ -823,8 +823,10 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+ maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
+ mtu : 0xFFFF;
- if (cork->length + length > 0xFFFF - fragheaderlen) {
+ if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
mtu-exthdrlen);
return -EMSGSIZE;
@@ -1122,7 +1124,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
int mtu;
int len;
int err;
- unsigned int maxfraglen, fragheaderlen, fraggap;
+ unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize;
if (inet->hdrincl)
return -EPERM;
@@ -1146,8 +1148,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+ maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
+ mtu : 0xFFFF;
- if (cork->length + size > 0xFFFF - fragheaderlen) {
+ if (cork->length + size > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu);
return -EMSGSIZE;
}