aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-07-07 21:14:29 -0700
committerDavid S. Miller <davem@davemloft.net>2014-07-07 21:14:29 -0700
commit6c035ea0a2c20c6c481241da562f1adb5b7b8631 (patch)
tree5100dd399922df55b29a1acf2b5a4dcc332c10b4
parentMerge branch 'systemport-next' (diff)
parentnet: Only do flow_dissector hash computation once per packet (diff)
downloadlinux-dev-6c035ea0a2c20c6c481241da562f1adb5b7b8631.tar.xz
linux-dev-6c035ea0a2c20c6c481241da562f1adb5b7b8631.zip
Merge branch 'net-hash-tx'
Tom Herbert says: ==================== net: Improvements and applications of packet flow hash in transmit path This patch series includes some patches which improve and make use of skb->hash in the transmit path. What is included: - Infrastructure to save a precomputed hash in the sock structure. For connected TCP and UDP sockets we only need to compute the flow hash once and not once for every packet. - Call skb_get_hash in get_xps_queue and __skb_tx_hash. This eliminates the awkward access to skb->sk->sk_hash in the lower transmit path. - Move UDP source port generation into a common function in udp.h This implementation is mostly based on vxlan_src_port. - Use non-zero IPv6 flow labels in flow_dissector as port information for flow hash calculation. - Implement automatic flow label generation on transmit (per RFC 6438). - Don't repeatedly try to compute an L4 hash in skb_get_hash if we've already tried to find one in software stack calculation. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt9
-rw-r--r--drivers/net/vxlan.c26
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/skbuff.h11
-rw-r--r--include/net/flow_keys.h1
-rw-r--r--include/net/ip.h14
-rw-r--r--include/net/ipv6.h35
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/net/sock.h11
-rw-r--r--include/net/udp.h29
-rw-r--r--include/net/vxlan.h2
-rw-r--r--include/uapi/linux/in6.h1
-rw-r--r--net/core/flow_dissector.c84
-rw-r--r--net/ipv4/datagram.c1
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/datagram.c1
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c8
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/openvswitch/vport-vxlan.c5
26 files changed, 202 insertions, 76 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 10e216c6e05e..f35bfe43bf7a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN
FALSE: disabled
Default: TRUE
+auto_flowlabels - BOOLEAN
+ Automatically generate flow labels based based on a flow hash
+ of the packet. This allows intermediate devices, such as routers,
+ to idenfify packet flows for mechanisms like Equal Cost Multipath
+ Routing (see RFC 6438).
+ TRUE: enabled
+ FALSE: disabled
+ Default: false
+
anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6
echo reply
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ade33ef82823..c2d360150804 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1570,25 +1570,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-/* Compute source port for outgoing packet
- * first choice to use L4 flow hash since it will spread
- * better and maybe available from hardware
- * secondary choice is to use jhash on the Ethernet header
- */
-__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
-{
- unsigned int range = (port_max - port_min) + 1;
- u32 hash;
-
- hash = skb_get_hash(skb);
- if (!hash)
- hash = jhash(skb->data, 2 * ETH_ALEN,
- (__force u32) skb->protocol);
-
- return htons((((u64) hash * range) >> 32) + port_min);
-}
-EXPORT_SYMBOL_GPL(vxlan_src_port);
-
static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
bool udp_csum)
{
@@ -1807,7 +1788,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
- src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);
+ src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->port_min,
+ vxlan->port_max, true);
if (dst->sa.sa_family == AF_INET) {
memset(&fl4, 0, sizeof(fl4));
@@ -2235,7 +2217,6 @@ static void vxlan_setup(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h;
- int low, high;
eth_hw_addr_random(dev);
ether_setup(dev);
@@ -2272,9 +2253,6 @@ static void vxlan_setup(struct net_device *dev)
vxlan->age_timer.function = vxlan_cleanup;
vxlan->age_timer.data = (unsigned long) vxlan;
- inet_get_local_port_range(dev_net(dev), &low, &high);
- vxlan->port_min = low;
- vxlan->port_max = high;
vxlan->dst_port = htons(vxlan_port);
vxlan->dev = dev;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5dc68c3ebcbd..ff560537dd61 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,7 +199,8 @@ struct ipv6_pinfo {
* 010: prefer public address
* 100: prefer care-of address
*/
- dontfrag:1;
+ dontfrag:1,
+ autoflowlabel:1;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 66f9a04ec270..8b43a28ee0bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2486,7 +2486,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
* as a distribution range limit for the returned value.
*/
static inline u16 skb_tx_hash(const struct net_device *dev,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ec89301ada41..890fb3307dd6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -455,6 +455,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
* ports.
+ * @sw_hash: indicates hash was computed in software stack
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
@@ -562,6 +563,7 @@ struct sk_buff {
__u8 pfmemalloc:1;
__u8 ooo_okay:1;
__u8 l4_hash:1;
+ __u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
__u8 no_fcs:1;
@@ -575,7 +577,7 @@ struct sk_buff {
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
__u8 csum_complete_sw:1;
- /* 3/5 bit hole (depending on ndisc_nodetype presence) */
+ /* 2/4 bit hole (depending on ndisc_nodetype presence) */
kmemcheck_bitfield_end(flags2);
#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -830,13 +832,14 @@ static inline void
skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
{
skb->l4_hash = (type == PKT_HASH_TYPE_L4);
+ skb->sw_hash = 0;
skb->hash = hash;
}
void __skb_get_hash(struct sk_buff *skb);
static inline __u32 skb_get_hash(struct sk_buff *skb)
{
- if (!skb->l4_hash)
+ if (!skb->l4_hash && !skb->sw_hash)
__skb_get_hash(skb);
return skb->hash;
@@ -850,6 +853,7 @@ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
static inline void skb_clear_hash(struct sk_buff *skb)
{
skb->hash = 0;
+ skb->sw_hash = 0;
skb->l4_hash = 0;
}
@@ -862,6 +866,7 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
{
to->hash = from->hash;
+ to->sw_hash = from->sw_hash;
to->l4_hash = from->l4_hash;
};
@@ -3005,7 +3010,7 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
return skb->queue_mapping != 0;
}
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues);
static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index fbefdca5e283..6667a054763a 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -29,4 +29,5 @@ struct flow_keys {
bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow);
__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto);
+u32 flow_hash_from_keys(struct flow_keys *keys);
#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 0e795df05ec9..2e8f055989c3 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -31,6 +31,7 @@
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
+#include <net/flow_keys.h>
struct sock;
@@ -353,6 +354,19 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
skb->len, proto, 0);
}
+static inline void inet_set_txhash(struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct flow_keys keys;
+
+ keys.src = inet->inet_saddr;
+ keys.dst = inet->inet_daddr;
+ keys.port16[0] = inet->inet_sport;
+ keys.port16[1] = inet->inet_dport;
+
+ sk->sk_txhash = flow_hash_from_keys(&keys);
+}
+
/*
* Map a multicast IP onto multicast MAC for type ethernet.
*/
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 574337fe72dd..4308f2ada8b3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -19,6 +19,7 @@
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/flow.h>
+#include <net/flow_keys.h>
#include <net/snmp.h>
#define SIN6_LEN_RFC2133 24
@@ -684,6 +685,40 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
return hlimit;
}
+static inline void ip6_set_txhash(struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flow_keys keys;
+
+ keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
+ keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
+ keys.port16[0] = inet->inet_sport;
+ keys.port16[1] = inet->inet_dport;
+
+ sk->sk_txhash = flow_hash_from_keys(&keys);
+}
+
+static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+ __be32 flowlabel, bool autolabel)
+{
+ if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
+ __be32 hash;
+
+ hash = skb_get_hash(skb);
+
+ /* Since this is being sent on the wire obfuscate hash a bit
+ * to minimize possbility that any useful information to an
+ * attacker is leaked. Only lower 20 bits are relevant.
+ */
+ hash ^= hash >> 12;
+
+ flowlabel = hash & IPV6_FLOWLABEL_MASK;
+ }
+
+ return flowlabel;
+}
+
/*
* Header manipulation
*/
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 19d3446e59d2..eade27adecf3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
int flowlabel_consistency;
+ int auto_flowlabels;
int icmpv6_time;
int anycast_src_echo_reply;
int fwmark_reflect;
diff --git a/include/net/sock.h b/include/net/sock.h
index 8d4c9473e7d7..cb84b2f1ad8f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -273,6 +273,7 @@ struct cg_proto;
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer
+ * @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
@@ -347,6 +348,7 @@ struct sock {
#ifdef CONFIG_RPS
__u32 sk_rxhash;
#endif
+ __u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_napi_id;
unsigned int sk_ll_usec;
@@ -1980,6 +1982,14 @@ static inline void sock_poll_wait(struct file *filp,
}
}
+static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
+{
+ if (sk->sk_txhash) {
+ skb->l4_hash = 1;
+ skb->hash = sk->sk_txhash;
+ }
+}
+
/*
* Queue a received datagram if it will fit. Stream and sequenced
* protocols can't normally use this as they need to fit buffers in
@@ -1994,6 +2004,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_wfree;
+ skb_set_hash_from_sk(skb, sk);
/*
* We used to take a refcount on sk, but following operation
* is enough to guarantee sk_free() wont free this sock until
diff --git a/include/net/udp.h b/include/net/udp.h
index 68a1fefe3dfe..70f941368ace 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -176,6 +176,35 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*)(const struct sock *, const struct sock *),
unsigned int hash2_nulladdr);
+static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
+ int min, int max, bool use_eth)
+{
+ u32 hash;
+
+ if (min >= max) {
+ /* Use default range */
+ inet_get_local_port_range(net, &min, &max);
+ }
+
+ hash = skb_get_hash(skb);
+ if (unlikely(!hash) && use_eth) {
+ /* Can't find a normal hash, caller has indicated an Ethernet
+ * packet so use that to compute a hash.
+ */
+ hash = jhash(skb->data, 2 * ETH_ALEN,
+ (__force u32) skb->protocol);
+ }
+
+ /* Since this is being sent on the wire obfuscate hash a bit
+ * to minimize possbility that any useful information to an
+ * attacker is leaked. Only upper 16 bits are relevant in the
+ * computation for 16 bit port value.
+ */
+ hash ^= hash << 16;
+
+ return htons((((u64) hash * (max - min)) >> 32) + min);
+}
+
/* net/ipv4/udp.c */
void udp_v4_early_demux(struct sk_buff *skb);
int udp_get_port(struct sock *sk, unsigned short snum,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 12196ce661d9..d5f59f3fc35d 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -45,8 +45,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
-__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
-
/* IP header + UDP + VXLAN + Ethernet header */
#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
/* IPv6 header + UDP + VXLAN + Ethernet header */
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 0d8e0f0342dc..22b7a69619d8 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,6 +233,7 @@ struct in6_flowlabel_req {
#if 0 /* not yet */
#define IPV6_USE_MIN_MTU 63
#endif
+#define IPV6_AUTOFLOWLABEL 64
/*
* Netfilter (1)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c2b53c1b21d2..5f362c1d0332 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -80,6 +80,8 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
+ __be32 flow_label;
+
ipv6:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (!iph)
@@ -89,6 +91,21 @@ ipv6:
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
nhoff += sizeof(struct ipv6hdr);
+
+ flow_label = ip6_flowlabel(iph);
+ if (flow_label) {
+ /* Awesome, IPv6 packet has a flow label so we can
+ * use that to represent the ports without any
+ * further dissection.
+ */
+ flow->n_proto = proto;
+ flow->ip_proto = ip_proto;
+ flow->ports = flow_label;
+ flow->thoff = (u16)nhoff;
+
+ return true;
+ }
+
break;
}
case htons(ETH_P_8021AD):
@@ -196,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
return jhash_3words(a, b, c, hashrnd);
}
-static __always_inline u32 __flow_hash_1word(u32 a)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
{
- __flow_hash_secret_init();
- return jhash_1word(a, hashrnd);
+ u32 hash;
+
+ /* get a consistent hash (same value on both flow directions) */
+ if (((__force u32)keys->dst < (__force u32)keys->src) ||
+ (((__force u32)keys->dst == (__force u32)keys->src) &&
+ ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
+ swap(keys->dst, keys->src);
+ swap(keys->port16[0], keys->port16[1]);
+ }
+
+ hash = __flow_hash_3words((__force u32)keys->dst,
+ (__force u32)keys->src,
+ (__force u32)keys->ports);
+ if (!hash)
+ hash = 1;
+
+ return hash;
}
+u32 flow_hash_from_keys(struct flow_keys *keys)
+{
+ return __flow_hash_from_keys(keys);
+}
+EXPORT_SYMBOL(flow_hash_from_keys);
+
/*
* __skb_get_hash: calculate a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value
@@ -211,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a)
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
- u32 hash;
if (!skb_flow_dissect(skb, &keys))
return;
@@ -219,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb)
if (keys.ports)
skb->l4_hash = 1;
- /* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys.dst < (__force u32)keys.src) ||
- (((__force u32)keys.dst == (__force u32)keys.src) &&
- ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
- swap(keys.dst, keys.src);
- swap(keys.port16[0], keys.port16[1]);
- }
-
- hash = __flow_hash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports);
- if (!hash)
- hash = 1;
+ skb->sw_hash = 1;
- skb->hash = hash;
+ skb->hash = __flow_hash_from_keys(&keys);
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -241,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash);
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
@@ -261,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
qcount = dev->tc_to_txq[tc].count;
}
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol;
- hash = __flow_hash_1word(hash);
-
- return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+ return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
@@ -339,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
- else {
- u32 hash;
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol ^
- skb->hash;
- hash = __flow_hash_1word(hash);
+ else
queue_index = map->queues[
- ((u64)hash * map->len) >> 32];
- }
+ ((u64)skb_get_hash(skb) * map->len) >> 32];
+
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index a3095fdefbed..90c0e8386116 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
+ inet_set_txhash(sk);
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 52d0f6a1ec2c..1edc739b9da5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
+ inet_set_txhash(sk);
+
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -1334,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
+ inet_set_txhash(newsk);
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f8f2a944a1ce..bcee13c4627c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
skb->destructor = tcp_wfree;
+ skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a426cd7099bb..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
+ net->ipv6.sysctl.auto_flowlabels = 0;
atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c3bf2d2e519e..2753319524f1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,6 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
+ ip6_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..365b2b6f3942 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
* Push down and install the IP header.
*/
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1);
- ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+ ip6_flow_hdr(ipv6h, 0,
+ ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cb9df0eb4023..fa83bdd4c3dd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, np->cork.tclass,
+ ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..51a1eb185ea7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cc34f65179e4..b50b9e54cf53 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -834,6 +834,10 @@ pref_skip_coa:
np->dontfrag = valbool;
retv = 0;
break;
+ case IPV6_AUTOFLOWLABEL:
+ np->autoflowlabel = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->dontfrag;
break;
+ case IPV6_AUTOFLOWLABEL:
+ val = np->autoflowlabel;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..5bf7b61f8ae8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -39,6 +39,13 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "auto_flowlabels",
+ .data = &init_net.ipv6.sysctl.auto_flowlabels,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+ ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a97c95585da8..22055b098428 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
+ ip6_set_txhash(sk);
+
/*
* TCP over IPv4
*/
@@ -1132,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
+ ip6_set_txhash(newsk);
+
/* Now IPv6 options...
First: no IPv4 options.
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 0edbd95c60e7..d8b7e247bebf 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -143,8 +143,6 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
- int port_min;
- int port_max;
__be16 df;
int err;
@@ -172,8 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->ignore_df = 1;
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = vxlan_src_port(port_min, port_max, skb);
+ src_port = udp_flow_src_port(net, skb, 0, 0, true);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,