aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_input.c
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2019-11-20 13:47:37 +0100
committerDavid S. Miller <davem@davemloft.net>2019-11-21 14:45:55 -0800
commit02b24941619fcce3d280311ac73b1e461552e9c8 (patch)
treee034d97436dfa8b32deff124f3ab172648aef9e4 /net/ipv4/ip_input.c
parentipv4: move fib4_has_custom_rules() helper to public header (diff)
downloadlinux-dev-02b24941619fcce3d280311ac73b1e461552e9c8.tar.xz
linux-dev-02b24941619fcce3d280311ac73b1e461552e9c8.zip
ipv4: use dst hint for ipv4 list receive
This is alike the previous change, with some additional ipv4 specific quirk. Even when using the route hint we still have to do perform additional per packet checks about source address validity: a new helper is added to wrap them. Hints are explicitly disabled if the destination is a local broadcast, that keeps the code simple and local broadcast are a slower path anyway. UDP flood performances vs recvmmsg() receiver: vanilla patched delta Kpps Kpps % 1683 1871 +11 In the worst case scenario - each packet has a different destination address - the performance delta is within noise range. v3 -> v4: - re-enable hints for forward v2 -> v3: - really fix build (sic) and hint usage check - use fib4_has_custom_rules() helpers (David A.) - add ip_extract_route_hint() helper (Edward C.) - use prev skb as hint instead of copying data (Willem) v1 -> v2: - fix build issue with !CONFIG_IP_MULTIPLE_TABLES Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to '')
-rw-r--r--net/ipv4/ip_input.c35
1 files changed, 31 insertions, 4 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 24a95126e698..aa438c6758a7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -302,16 +302,31 @@ drop:
return true;
}
+static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
+ const struct sk_buff *hint)
+{
+ return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
+ ip_hdr(hint)->tos == iph->tos;
+}
+
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
- struct sk_buff *skb, struct net_device *dev)
+ struct sk_buff *skb, struct net_device *dev,
+ const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
struct rtable *rt;
int err;
+ if (ip_can_use_hint(skb, iph, hint)) {
+ err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
+ dev, hint);
+ if (unlikely(err))
+ goto drop_error;
+ }
+
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
@@ -408,7 +423,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!skb)
return NET_RX_SUCCESS;
- ret = ip_rcv_finish_core(net, sk, skb, dev);
+ ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
if (ret != NET_RX_DROP)
ret = dst_input(skb);
return ret;
@@ -535,11 +550,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
}
}
+static struct sk_buff *ip_extract_route_hint(const struct net *net,
+ struct sk_buff *skb, int rt_type)
+{
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ return NULL;
+
+ return skb;
+}
+
static void ip_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
+ struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
@@ -554,11 +578,14 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip_rcv(skb);
if (!skb)
continue;
- if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
+ if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
continue;
dst = skb_dst(skb);
if (curr_dst != dst) {
+ hint = ip_extract_route_hint(net, skb,
+ ((struct rtable *)dst)->rt_type);
+
/* dispatch old sublist */
if (!list_empty(&sublist))
ip_sublist_rcv_finish(&sublist);