aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-10-30 17:21:35 -0700
committerDavid S. Miller <davem@davemloft.net>2019-10-30 17:21:35 -0700
commit29f52875ba99d6002555f60c9a840072660112c2 (patch)
tree96c5cd02ed39c81b67e8e011ba19f1975744cdf9
parenttc-testing: fixed two failing pedit tests (diff)
parentbonding: balance ICMP echoes in layer3+4 mode (diff)
downloadlinux-dev-29f52875ba99d6002555f60c9a840072660112c2.tar.xz
linux-dev-29f52875ba99d6002555f60c9a840072660112c2.zip
Merge branch 'ICMP-flow-improvements'
Matteo Croce says: ==================== ICMP flow improvements This series improves the flow inspector handling of ICMP packets: The first two patches just add some comments in the code which would have saved me a few minutes of time, and refactor a piece of code. The third one adds to the flow inspector the capability to extract the Identifier field, if present, so echo requests and replies are classified as part of the same flow. The fourth patch uses the function introduced earlier to the bonding driver, so echo replies can be balanced across bonding slaves. v1 -> v2: - remove unused struct members - add an helper to check for the Id field - use a local flow_dissector_key in the bonding to avoid changing behaviour of the flow dissector ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c77
-rw-r--r--include/net/flow_dissector.h20
-rw-r--r--net/core/flow_dissector.c108
3 files changed, 160 insertions, 45 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 21d8fcc83c9c..3e496e746cc6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -200,6 +200,51 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
unsigned int bond_net_id __read_mostly;
+static const struct flow_dissector_key flow_keys_bonding_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_ICMP,
+ .offset = offsetof(struct flow_keys, icmp),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_VLAN,
+ .offset = offsetof(struct flow_keys, vlan),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
+ .offset = offsetof(struct flow_keys, tags),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct flow_keys, keyid),
+ },
+};
+
+static struct flow_dissector flow_keys_bonding __read_mostly;
+
/*-------------------------- Forward declarations ---------------------------*/
static int bond_init(struct net_device *bond_dev);
@@ -3263,10 +3308,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
const struct iphdr *iph;
int noff, proto = -1;
- if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
- return skb_flow_dissect_flow_keys(skb, fk, 0);
+ if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) {
+ memset(fk, 0, sizeof(*fk));
+ return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
+ fk, NULL, 0, 0, 0, 0);
+ }
fk->ports.ports = 0;
+ memset(&fk->icmp, 0, sizeof(fk->icmp));
noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
@@ -3286,8 +3335,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
} else {
return false;
}
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) {
+ if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
+ skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
+ skb_transport_offset(skb),
+ skb_headlen(skb));
+ else
+ fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+ }
return true;
}
@@ -3314,10 +3369,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return bond_eth_hash(skb);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
- bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+ bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb);
- else
- hash = (__force u32)flow.ports.ports;
+ } else {
+ if (flow.icmp.id)
+ memcpy(&hash, &flow.icmp, sizeof(hash));
+ else
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ }
hash ^= (__force u32)flow_get_u32_dst(&flow) ^
(__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16);
@@ -4901,6 +4960,10 @@ static int __init bonding_init(void)
goto err;
}
+ skb_flow_dissector_init(&flow_keys_bonding,
+ flow_keys_bonding_keys,
+ ARRAY_SIZE(flow_keys_bonding_keys));
+
register_netdevice_notifier(&bond_netdev_notifier);
out:
return res;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 90bd210be060..f8541d018848 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -6,6 +6,8 @@
#include <linux/in6.h>
#include <uapi/linux/if_ether.h>
+struct sk_buff;
+
/**
* struct flow_dissector_key_control:
* @thoff: Transport header offset
@@ -156,19 +158,16 @@ struct flow_dissector_key_ports {
/**
* flow_dissector_key_icmp:
- * @ports: type and code of ICMP header
- * icmp: ICMP type (high) and code (low)
* type: ICMP type
* code: ICMP code
+ * id: session identifier
*/
struct flow_dissector_key_icmp {
- union {
- __be16 icmp;
- struct {
- u8 type;
- u8 code;
- };
+ struct {
+ u8 type;
+ u8 code;
};
+ u16 id;
};
/**
@@ -282,6 +281,8 @@ struct flow_keys {
struct flow_dissector_key_vlan cvlan;
struct flow_dissector_key_keyid keyid;
struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_icmp icmp;
+ /* 'addrs' must be the last member */
struct flow_dissector_key_addrs addrs;
};
@@ -315,6 +316,9 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
}
u32 flow_hash_from_keys(struct flow_keys *keys);
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+ struct flow_dissector_key_icmp *key_icmp,
+ void *data, int thoff, int hlen);
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index dbf502c18656..0807df0bde02 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
mutex_unlock(&flow_dissector_mutex);
return 0;
}
-/**
- * skb_flow_get_be16 - extract be16 entity
- * @skb: sk_buff to extract from
- * @poff: offset to extract at
- * @data: raw buffer pointer to the packet
- * @hlen: packet header length
- *
- * The function will try to retrieve a be32 entity at
- * offset poff
- */
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
- void *data, int hlen)
-{
- __be16 *u, _u;
-
- u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
- if (u)
- return *u;
-
- return 0;
-}
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -234,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static bool icmp_has_id(u8 type)
+{
+ switch (type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ case ICMP_TIMESTAMP:
+ case ICMP_TIMESTAMPREPLY:
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
+ * @skb: sk_buff to extract from
+ * @key_icmp: struct flow_dissector_key_icmp to fill
+ * @data: raw buffer pointer to the packet
+ * @toff: offset to extract at
+ * @hlen: packet header length
+ */
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+ struct flow_dissector_key_icmp *key_icmp,
+ void *data, int thoff, int hlen)
+{
+ struct icmphdr *ih, _ih;
+
+ ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
+ if (!ih)
+ return;
+
+ key_icmp->type = ih->type;
+ key_icmp->code = ih->code;
+
+ /* As we use 0 to signal that the Id field is not present,
+ * avoid confusion with packets without such field
+ */
+ if (icmp_has_id(ih->type))
+ key_icmp->id = ih->un.echo.id ? : 1;
+ else
+ key_icmp->id = 0;
+}
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
+
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
+ * using skb_flow_get_icmp_tci().
+ */
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ void *data, int thoff, int hlen)
+{
+ struct flow_dissector_key_icmp *key_icmp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
+ return;
+
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+
+ skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
+}
+
void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
@@ -884,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
- struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct bpf_prog *attached = NULL;
@@ -1329,6 +1373,12 @@ ip_proto_again:
data, nhoff, hlen);
break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+
default:
break;
}
@@ -1342,14 +1392,6 @@ ip_proto_again:
data, hlen);
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP)) {
- key_icmp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP,
- target_container);
- key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
- }
-
/* Process result of IP proto processing */
switch (fdret) {
case FLOW_DISSECT_RET_PROTO_AGAIN:
@@ -1408,6 +1450,9 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
+ /* flow.addrs MUST be the last member in struct flow_keys because
+ * different L3 protocols have different address length
+ */
BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
sizeof(*flow) - sizeof(flow->addrs));
@@ -1455,6 +1500,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
}
EXPORT_SYMBOL(flow_get_u32_dst);
+/* Sort the source and destination IP (and the ports if the IP are the same),
+ * to have consistent hash within the two directions
+ */
static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
int addr_diff, i;