aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h15
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/caif/cfpkt.h27
-rw-r--r--include/net/cfg80211.h17
-rw-r--r--include/net/devlink.h115
-rw-r--r--include/net/dn_route.h1
-rw-r--r--include/net/dsa.h61
-rw-r--r--include/net/dst.h39
-rw-r--r--include/net/erspan.h240
-rw-r--r--include/net/gen_stats.h3
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/inet_hashtables.h29
-rw-r--r--include/net/inet_sock.h25
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ip.h9
-rw-r--r--include/net/ip6_fib.h20
-rw-r--r--include/net/ip6_route.h11
-rw-r--r--include/net/ip6_tunnel.h4
-rw-r--r--include/net/ip_tunnels.h5
-rw-r--r--include/net/ip_vs.h3
-rw-r--r--include/net/ipv6.h19
-rw-r--r--include/net/mac80211.h10
-rw-r--r--include/net/net_namespace.h10
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h12
-rw-r--r--include/net/netfilter/ipv6/nf_conntrack_ipv6.h12
-rw-r--r--include/net/netfilter/nf_conntrack_count.h17
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h19
-rw-r--r--include/net/netfilter/nf_flow_table.h122
-rw-r--r--include/net/netfilter/nf_queue.h2
-rw-r--r--include/net/netfilter/nf_tables.h129
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h27
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h29
-rw-r--r--include/net/netns/can.h4
-rw-r--r--include/net/netns/core.h5
-rw-r--r--include/net/netns/netfilter.h12
-rw-r--r--include/net/netns/nftables.h8
-rw-r--r--include/net/netns/sctp.h5
-rw-r--r--include/net/pkt_cls.h113
-rw-r--r--include/net/pkt_sched.h17
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sch_generic.h148
-rw-r--r--include/net/sctp/constants.h9
-rw-r--r--include/net/sctp/sctp.h6
-rw-r--r--include/net/sctp/sm.h18
-rw-r--r--include/net/sctp/stream_interleave.h61
-rw-r--r--include/net/sctp/structs.h68
-rw-r--r--include/net/sctp/ulpevent.h23
-rw-r--r--include/net/sctp/ulpqueue.h10
-rw-r--r--include/net/sock.h57
-rw-r--r--include/net/tc_act/tc_csum.h16
-rw-r--r--include/net/tc_act/tc_mirred.h6
-rw-r--r--include/net/tcp.h52
-rw-r--r--include/net/vxlan.h2
-rw-r--r--include/net/wext.h4
-rw-r--r--include/net/xdp.h48
-rw-r--r--include/net/xfrm.h79
57 files changed, 1455 insertions, 362 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index fd08df74c466..6ed9692f20bd 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -86,7 +86,7 @@ struct tc_action_ops {
int (*act)(struct sk_buff *, const struct tc_action *,
struct tcf_result *);
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
- void (*cleanup)(struct tc_action *, int bind);
+ void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *, struct tc_action **, u32);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
@@ -120,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn,
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
struct tcf_idrinfo *idrinfo);
-static inline void tc_action_net_exit(struct tc_action_net *tn)
+static inline void tc_action_net_exit(struct list_head *net_list,
+ unsigned int id)
{
+ struct net *net;
+
rtnl_lock();
- tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct tc_action_net *tn = net_generic(net, id);
+
+ tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ kfree(tn->idrinfo);
+ }
rtnl_unlock();
- kfree(tn->idrinfo);
}
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b623b65a79d1..c4185a7b0e90 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -180,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
*/
int ipv6_addr_label_init(void);
void ipv6_addr_label_cleanup(void);
-void ipv6_addr_label_rtnl_register(void);
+int ipv6_addr_label_rtnl_register(void);
u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
int type, int ifindex);
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index fe328c52c46b..801489bb14c3 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -32,6 +32,33 @@ void cfpkt_destroy(struct cfpkt *pkt);
*/
int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len);
+static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt)
+{
+ u8 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 1);
+
+ return tmp;
+}
+
+static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt)
+{
+ __le16 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 2);
+
+ return le16_to_cpu(tmp);
+}
+
+static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt)
+{
+ __le32 tmp;
+
+ cfpkt_extr_head(pkt, &tmp, 4);
+
+ return le32_to_cpu(tmp);
+}
+
/*
* Peek header from packet.
* Reads data from packet without changing packet.
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fb94a8bd8ab5..81174f9b8d14 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1775,6 +1775,8 @@ enum cfg80211_signal_type {
* by %parent_bssid.
* @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
* the BSS that requested the scan in which the beacon/probe was received.
+ * @chains: bitmask for filled values in @chain_signal.
+ * @chain_signal: per-chain signal strength of last received BSS in dBm.
*/
struct cfg80211_inform_bss {
struct ieee80211_channel *chan;
@@ -1783,6 +1785,8 @@ struct cfg80211_inform_bss {
u64 boottime_ns;
u64 parent_tsf;
u8 parent_bssid[ETH_ALEN] __aligned(2);
+ u8 chains;
+ s8 chain_signal[IEEE80211_MAX_CHAINS];
};
/**
@@ -1826,6 +1830,8 @@ struct cfg80211_bss_ies {
* that holds the beacon data. @beacon_ies is still valid, of course, and
* points to the same data as hidden_beacon_bss->beacon_ies in that case.
* @signal: signal strength value (type depends on the wiphy's signal_type)
+ * @chains: bitmask for filled values in @chain_signal.
+ * @chain_signal: per-chain signal strength of last received BSS in dBm.
* @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
*/
struct cfg80211_bss {
@@ -1844,6 +1850,8 @@ struct cfg80211_bss {
u16 capability;
u8 bssid[ETH_ALEN];
+ u8 chains;
+ s8 chain_signal[IEEE80211_MAX_CHAINS];
u8 priv[0] __aligned(sizeof(void *));
};
@@ -2023,6 +2031,9 @@ struct cfg80211_disassoc_request {
* @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask
* will be used in ht_capa. Un-supported values will be ignored.
* @ht_capa_mask: The bits of ht_capa which are to be used.
+ * @wep_keys: static WEP keys, if not NULL points to an array of
+ * CFG80211_MAX_WEP_KEYS WEP keys
+ * @wep_tx_key: key index (0..3) of the default TX static WEP key
*/
struct cfg80211_ibss_params {
const u8 *ssid;
@@ -2039,6 +2050,8 @@ struct cfg80211_ibss_params {
int mcast_rate[NUM_NL80211_BANDS];
struct ieee80211_ht_cap ht_capa;
struct ieee80211_ht_cap ht_capa_mask;
+ struct key_params *wep_keys;
+ int wep_tx_key;
};
/**
@@ -5577,7 +5590,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
* cfg80211_rx_mgmt - notification of received, unprocessed management frame
* @wdev: wireless device receiving the frame
* @freq: Frequency on which the frame was received in MHz
- * @sig_dbm: signal strength in mBm, or 0 if unknown
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
* @buf: Management frame (header + body)
* @len: length of the frame data
* @flags: flags, as defined in enum nl80211_rxmgmt_flags
@@ -5756,7 +5769,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
* @frame: the frame
* @len: length of the frame
* @freq: frequency the frame was received on
- * @sig_dbm: signal strength in mBm, or 0 if unknown
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
*
* Use this function to report to userspace when a beacon was
* received. It is not useful to call this when there is no
diff --git a/include/net/devlink.h b/include/net/devlink.h
index b9654e133599..6545b03e97f7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -26,10 +26,12 @@ struct devlink {
struct list_head port_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
+ struct list_head resource_list;
struct devlink_dpipe_headers *dpipe_headers;
const struct devlink_ops *ops;
struct device *dev;
possible_net_t _net;
+ struct mutex lock;
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -181,6 +183,9 @@ struct devlink_dpipe_table_ops;
* @counters_enabled: indicates if counters are active
* @counter_control_extern: indicates if counter control is in dpipe or
* external tool
+ * @resource_valid: Indicate that the resource id is valid
+ * @resource_id: relative resource this table is related to
+ * @resource_units: number of resource's unit consumed per table's entry
* @table_ops: table operations
* @rcu: rcu
*/
@@ -190,6 +195,9 @@ struct devlink_dpipe_table {
const char *name;
bool counters_enabled;
bool counter_control_extern;
+ bool resource_valid;
+ u64 resource_id;
+ u64 resource_units;
struct devlink_dpipe_table_ops *table_ops;
struct rcu_head rcu;
};
@@ -223,7 +231,63 @@ struct devlink_dpipe_headers {
unsigned int headers_count;
};
+/**
+ * struct devlink_resource_ops - resource ops
+ * @occ_get: get the occupied size
+ * @size_validate: validate the size of the resource before update, reload
+ * is needed for changes to take place
+ */
+struct devlink_resource_ops {
+ u64 (*occ_get)(struct devlink *devlink);
+ int (*size_validate)(struct devlink *devlink, u64 size,
+ struct netlink_ext_ack *extack);
+};
+
+/**
+ * struct devlink_resource_size_params - resource's size parameters
+ * @size_min: minimum size which can be set
+ * @size_max: maximum size which can be set
+ * @size_granularity: size granularity
+ * @size_unit: resource's basic unit
+ */
+struct devlink_resource_size_params {
+ u64 size_min;
+ u64 size_max;
+ u64 size_granularity;
+ enum devlink_resource_unit unit;
+};
+
+/**
+ * struct devlink_resource - devlink resource
+ * @name: name of the resource
+ * @id: id, per devlink instance
+ * @size: size of the resource
+ * @size_new: updated size of the resource, reload is needed
+ * @size_valid: valid in case the total size of the resource is valid
+ * including its children
+ * @parent: parent resource
+ * @size_params: size parameters
+ * @list: parent list
+ * @resource_list: list of child resources
+ * @resource_ops: resource ops
+ */
+struct devlink_resource {
+ const char *name;
+ u64 id;
+ u64 size;
+ u64 size_new;
+ bool size_valid;
+ struct devlink_resource *parent;
+ struct devlink_resource_size_params *size_params;
+ struct list_head list;
+ struct list_head resource_list;
+ const struct devlink_resource_ops *resource_ops;
+};
+
+#define DEVLINK_RESOURCE_ID_PARENT_TOP 0
+
struct devlink_ops {
+ int (*reload)(struct devlink *devlink);
int (*port_type_set)(struct devlink_port *devlink_port,
enum devlink_port_type port_type);
int (*port_split)(struct devlink *devlink, unsigned int port_index,
@@ -332,6 +396,23 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops);
+void devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource);
+int devlink_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size);
+int devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units);
+
#else
static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -468,6 +549,40 @@ devlink_dpipe_match_put(struct sk_buff *skb,
return 0;
}
+static inline int
+devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops)
+{
+ return 0;
+}
+
+static inline void
+devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+}
+
+static inline int
+devlink_resource_size_get(struct devlink *devlink, u64 resource_id,
+ u64 *p_resource_size)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 55df9939bca2..342d2503cba5 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
*/
struct dn_route {
struct dst_entry dst;
+ struct dn_route __rcu *dn_next;
struct neighbour *n;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2a05738570d8..6cb602dd970c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,31 +296,39 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds)
return mask;
}
-static inline u8 dsa_upstream_port(struct dsa_switch *ds)
+/* Return the local port used to reach an arbitrary switch port */
+static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device,
+ int port)
{
- struct dsa_switch_tree *dst = ds->dst;
-
- /*
- * If this is the root switch (i.e. the switch that connects
- * to the CPU), return the cpu port number on this switch.
- * Else return the (DSA) port number that connects to the
- * switch that is one hop closer to the cpu.
- */
- if (dst->cpu_dp->ds == ds)
- return dst->cpu_dp->index;
+ if (device == ds->index)
+ return port;
else
- return ds->rtable[dst->cpu_dp->ds->index];
+ return ds->rtable[device];
+}
+
+/* Return the local port used to reach the dedicated CPU port */
+static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
+{
+ const struct dsa_port *dp = dsa_to_port(ds, port);
+ const struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ if (!cpu_dp)
+ return port;
+
+ return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
}
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/*
* Legacy probing.
*/
const char *(*probe)(struct device *dsa_dev,
struct device *host_dev, int sw_addr,
void **priv);
+#endif
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
int port);
@@ -412,12 +420,10 @@ struct dsa_switch_ops {
*/
int (*port_vlan_filtering)(struct dsa_switch *ds, int port,
bool vlan_filtering);
- int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
- void (*port_vlan_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+ void (*port_vlan_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
int (*port_vlan_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan);
/*
@@ -433,12 +439,10 @@ struct dsa_switch_ops {
/*
* Multicast database
*/
- int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
- void (*port_mdb_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
+ int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
+ void (*port_mdb_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
int (*port_mdb_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb);
/*
@@ -472,11 +476,20 @@ struct dsa_switch_driver {
const struct dsa_switch_ops *ops;
};
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/* Legacy driver registration */
void register_switch_driver(struct dsa_switch_driver *type);
void unregister_switch_driver(struct dsa_switch_driver *type);
struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
+#else
+static inline void register_switch_driver(struct dsa_switch_driver *type) { }
+static inline void unregister_switch_driver(struct dsa_switch_driver *type) { }
+static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
+{
+ return NULL;
+}
+#endif
struct net_device *dsa_dev_to_net_device(struct device *dev);
/* Keep inline for faster access in hot path */
diff --git a/include/net/dst.h b/include/net/dst.h
index d49d607dd2b3..c63d2c37f6e9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -34,13 +34,9 @@ struct sk_buff;
struct dst_entry {
struct net_device *dev;
- struct rcu_head rcu_head;
- struct dst_entry *child;
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
- struct dst_entry *path;
- struct dst_entry *from;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
@@ -59,8 +55,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080
- short error;
-
/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
* dst layer to indicate that the entry has been forcefully
@@ -76,35 +70,24 @@ struct dst_entry {
#define DST_OBSOLETE_KILL -2
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
- unsigned short __pad3;
-#ifdef CONFIG_IP_ROUTE_CLASSID
- __u32 tclassid;
-#else
- __u32 __pad2;
-#endif
-
-#ifdef CONFIG_64BIT
- /*
- * Align __refcnt to a 64 bytes alignment
- * (L1_CACHE_SIZE would be too much)
- */
- long __pad_to_align_refcnt[2];
-#endif
/*
* __refcnt wants to be on a different cache line from
* input/output/ops or performance tanks badly
*/
- atomic_t __refcnt; /* client references */
+#ifdef CONFIG_64BIT
+ atomic_t __refcnt; /* 64-bit offset 64 */
+#endif
int __use;
unsigned long lastuse;
struct lwtunnel_state *lwtstate;
- union {
- struct dst_entry *next;
- struct rtable __rcu *rt_next;
- struct rt6_info __rcu *rt6_next;
- struct dn_route __rcu *dn_next;
- };
+ struct rcu_head rcu_head;
+ short error;
+ short __pad;
+ __u32 tclassid;
+#ifndef CONFIG_64BIT
+ atomic_t __refcnt; /* 32-bit offset 64 */
+#endif
};
struct dst_metrics {
@@ -250,7 +233,7 @@ static inline void dst_hold(struct dst_entry *dst)
{
/*
* If your kernel compilation stops here, please check
- * __pad_to_align_refcnt declaration in struct dst_entry
+ * the placement of __refcnt in struct dst_entry
*/
BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
diff --git a/include/net/erspan.h b/include/net/erspan.h
index ca94fc86865e..5daa4866412b 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -15,7 +15,7 @@
* s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
* other fields are set to zero, so only a sequence number follows.
*
- * ERSPAN Type II header (8 octets [42:49])
+ * ERSPAN Version 1 (Type II) header (8 octets [42:49])
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -24,11 +24,31 @@
* | Reserved | Index |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
+ *
+ * ERSPAN Version 2 (Type III) header (12 octets [42:49])
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ver | VLAN | COS |BSO|T| Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Timestamp |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SGT |P| FT | Hw ID |D|Gra|O|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Platform Specific SubHeader (8 octets, optional)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platf ID | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
* GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
*/
-#define ERSPAN_VERSION 0x1
+#include <uapi/linux/erspan.h>
+#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
#define VER_MASK 0xf000
#define VLAN_MASK 0x0fff
#define COS_MASK 0xe000
@@ -37,6 +57,19 @@
#define ID_MASK 0x03ff
#define INDEX_MASK 0xfffff
+#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/
+#define BSO_MASK EN_MASK
+#define SGT_MASK 0xffff0000
+#define P_MASK 0x8000
+#define FT_MASK 0x7c00
+#define HWID_MASK 0x03f0
+#define DIR_MASK 0x0008
+#define GRA_MASK 0x0006
+#define O_MASK 0x0001
+
+#define HWID_OFFSET 4
+#define DIR_OFFSET 3
+
enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
@@ -44,18 +77,199 @@ enum erspan_encap_type {
ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
};
-struct erspan_metadata {
- __be32 index; /* type II */
-};
+#define ERSPAN_V1_MDSIZE 4
+#define ERSPAN_V2_MDSIZE 8
-struct erspanhdr {
- __be16 ver_vlan;
-#define VER_OFFSET 12
- __be16 session_id;
-#define COS_OFFSET 13
-#define EN_OFFSET 11
-#define T_OFFSET 10
- struct erspan_metadata md;
+struct erspan_base_hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 vlan_upper:4,
+ ver:4;
+ __u8 vlan:8;
+ __u8 session_id_upper:2,
+ t:1,
+ en:2,
+ cos:3;
+ __u8 session_id:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 ver: 4,
+ vlan_upper:4;
+ __u8 vlan:8;
+ __u8 cos:3,
+ en:2,
+ t:1,
+ session_id_upper:2;
+ __u8 session_id:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
};
+static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id)
+{
+ ershdr->session_id = id & 0xff;
+ ershdr->session_id_upper = (id >> 8) & 0x3;
+}
+
+static inline u16 get_session_id(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->session_id_upper << 8) + ershdr->session_id;
+}
+
+static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan)
+{
+ ershdr->vlan = vlan & 0xff;
+ ershdr->vlan_upper = (vlan >> 8) & 0xf;
+}
+
+static inline u16 get_vlan(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->vlan_upper << 8) + ershdr->vlan;
+}
+
+static inline void set_hwid(struct erspan_md2 *md2, u8 hwid)
+{
+ md2->hwid = hwid & 0xf;
+ md2->hwid_upper = (hwid >> 4) & 0x3;
+}
+
+static inline u8 get_hwid(const struct erspan_md2 *md2)
+{
+ return (md2->hwid_upper << 4) + md2->hwid;
+}
+
+static inline int erspan_hdr_len(int version)
+{
+ return sizeof(struct erspan_base_hdr) +
+ (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
+}
+
+static inline u8 tos_to_cos(u8 tos)
+{
+ u8 dscp, cos;
+
+ dscp = tos >> 2;
+ cos = dscp >> 3;
+ return cos;
+}
+
+static inline void erspan_build_header(struct sk_buff *skb,
+ u32 id, u32 index,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ enum erspan_encap_type enc_type;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *ersmd;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u8 tos;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ enc_type = ERSPAN_ENCAP_NOVLAN;
+
+ /* If mirrored packet has vlan tag, extract tci and
+ * perserve vlan header in the mirrored frame.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ enc_type = ERSPAN_ENCAP_INFRAME;
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver = ERSPAN_VERSION;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = enc_type;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
+
+ /* Build metadata */
+ ersmd = (struct erspan_metadata *)(ershdr + 1);
+ ersmd->u.index = htonl(index & INDEX_MASK);
+}
+
+/* ERSPAN GRA: timestamp granularity
+ * 00b --> granularity = 100 microseconds
+ * 01b --> granularity = 100 nanoseconds
+ * 10b --> granularity = IEEE 1588
+ * Here we only support 100 microseconds.
+ */
+static inline __be32 erspan_get_timestamp(void)
+{
+ u64 h_usecs;
+ ktime_t kt;
+
+ kt = ktime_get_real();
+ h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC);
+
+ /* ERSPAN base header only has 32-bit,
+ * so it wraps around 4 days.
+ */
+ return htonl((u32)h_usecs);
+}
+
+static inline void erspan_build_header_v2(struct sk_buff *skb,
+ u32 id, u8 direction, u16 hwid,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *md;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u8 gra = 0; /* 100 usec */
+ u8 bso = 0; /* Bad/Short/Oversized */
+ u8 sgt = 0;
+ u8 tos;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ /* Unlike v1, v2 does not have En field,
+ * so only extract vlan tci field.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver = ERSPAN_VERSION2;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = bso;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
+
+ /* Build metadata */
+ md = (struct erspan_metadata *)(ershdr + 1);
+ md->u.md2.timestamp = erspan_get_timestamp();
+ md->u.md2.sgt = htons(sgt);
+ md->u.md2.p = 1;
+ md->u.md2.ft = 0;
+ md->u.md2.dir = direction;
+ md->u.md2.gra = gra;
+ md->u.md2.o = 0;
+ set_hwid(&md->u.md2, hwid);
+}
+
#endif
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 304f7aa9cc01..0304ba2ae353 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d,
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q, __u32 qlen);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 0358745ea059..8e1bf9ae4a5e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
+ * @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -101,6 +102,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data;
+ struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
icsk_ca_setsockopt:1,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 2dbbbff5e1e3..9141e95529e7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -111,6 +111,7 @@ struct inet_bind_hashbucket {
*/
struct inet_listen_hashbucket {
spinlock_t lock;
+ unsigned int count;
struct hlist_head head;
};
@@ -132,12 +133,13 @@ struct inet_hashinfo {
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
*/
+ struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
-
unsigned int bhash_size;
- /* 4 bytes hole on 64 bit */
- struct kmem_cache *bind_bucket_cachep;
+ /* The 2nd listener table hashed by local port and address */
+ unsigned int lhash2_mask;
+ struct inet_listen_hashbucket *lhash2;
/* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access.
@@ -145,14 +147,25 @@ struct inet_hashinfo {
* Now align to a new cache line as all the following members
* might be often dirty.
*/
- /* All sockets in TCP_LISTEN state will be in here. This is the only
- * table where wildcard'd TCP sockets can exist. Hash function here
- * is just local port number.
+ /* All sockets in TCP_LISTEN state will be in listening_hash.
+ * This is the only table where wildcard'd TCP sockets can
+ * exist. listening_hash is only hashed by local port number.
+ * If lhash2 is initialized, the same socket will also be hashed
+ * to lhash2 by port and address.
*/
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp;
};
+#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
+ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
+
+static inline struct inet_listen_hashbucket *
+inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
+{
+ return &h->lhash2[hash & h->lhash2_mask];
+}
+
static inline struct inet_ehash_bucket *inet_ehash_bucket(
struct inet_hashinfo *hashinfo,
unsigned int hash)
@@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h);
+void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ unsigned long numentries, int scale,
+ unsigned long low_limit,
+ unsigned long high_limit);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 39efb968b7a4..0a671c32d6b9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -291,6 +291,31 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
int inet_sk_rebuild_header(struct sock *sk);
+/**
+ * inet_sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with inet_sk_state_store(). Used in places we don't hold socket lock:
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int inet_sk_state_load(const struct sock *sk)
+{
+ /* state change might impact lockless readers. */
+ return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * inet_sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with inet_sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+void inet_sk_state_store(struct sock *sk, int newstate);
+
+void inet_sk_set_state(struct sock *sk, int state);
+
static inline unsigned int __inet_ehashfn(const __be32 laddr,
const __u16 lport,
const __be32 faddr,
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 1356fa6a7566..899495589a7e 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -93,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
struct inet_timewait_death_row *dr,
const int state);
-void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
- struct inet_hashinfo *hashinfo);
+void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ struct inet_hashinfo *hashinfo);
void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
bool rearm);
diff --git a/include/net/ip.h b/include/net/ip.h
index af8addbaa3c1..746abff9ce51 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -26,12 +26,14 @@
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/skbuff.h>
+#include <linux/jhash.h>
#include <net/inet_sock.h>
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
+#include <net/netns/hash.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
@@ -522,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip)
return (__force unsigned int) ip;
}
+static inline u32 ipv4_portaddr_hash(const struct net *net,
+ __be32 saddr,
+ unsigned int port)
+{
+ return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
+}
+
bool ip_call_ra_chain(struct sk_buff *skb);
/*
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 10c913816032..34ec321d6a03 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -129,6 +129,8 @@ struct rt6_exception {
struct rt6_info {
struct dst_entry dst;
+ struct rt6_info __rcu *rt6_next;
+ struct rt6_info *from;
/*
* Tail elements of dst_entry (__refcnt etc.)
@@ -147,6 +149,7 @@ struct rt6_info {
*/
struct list_head rt6i_siblings;
unsigned int rt6i_nsiblings;
+ atomic_t rt6i_nh_upper_bound;
atomic_t rt6i_ref;
@@ -168,19 +171,21 @@ struct rt6_info {
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
+ int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
u8 exception_bucket_flushed:1,
- unused:7;
+ should_flush:1,
+ unused:6;
};
#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
- rt = rcu_dereference(rt->dst.rt6_next))
+ rt = rcu_dereference(rt->rt6_next))
#define for_each_fib6_walker_rt(w) \
for (rt = (w)->leaf; rt; \
- rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
+ rt = rcu_dereference_protected(rt->rt6_next, 1))
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
@@ -203,11 +208,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
struct rt6_info *rt;
- for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES);
- rt = (struct rt6_info *)rt->dst.from);
+ for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
if (rt && rt != rt0)
rt0->dst.expires = rt->dst.expires;
-
dst_set_expires(&rt0->dst, timeout);
rt0->rt6i_flags |= RTF_EXPIRES;
}
@@ -242,8 +245,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
u32 cookie = 0;
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
- rt = (struct rt6_info *)(rt->dst.from);
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
+ rt = rt->from;
rt6_get_cookie_safe(rt, &cookie);
@@ -404,6 +407,7 @@ unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
void fib6_update_sernum(struct rt6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 18e442ea93d8..27d23a65f3cd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ RTF_GATEWAY;
+}
+
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
@@ -165,10 +171,13 @@ struct rt6_rtnl_dump_arg {
};
int rt6_dump_route(struct rt6_info *rt, void *p_arg);
-void rt6_ifdown(struct net *net, struct net_device *dev);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
+void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
+void rt6_disable_ip(struct net_device *dev, unsigned long event);
+void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
+void rt6_multipath_rebalance(struct rt6_info *rt);
static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
{
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index d66f70f63734..236e40ba06bf 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -36,6 +36,10 @@ struct __ip6_tnl_parm {
__be32 o_key;
__u32 fwmark;
+ __u32 index; /* ERSPAN type II index */
+ __u8 erspan_ver; /* ERSPAN version */
+ __u8 dir; /* direction */
+ __u16 hwid; /* hwid */
};
/* IPv6 tunnel */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 24628f6b09bf..1f16773cfd76 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -116,8 +116,11 @@ struct ip_tunnel {
u32 o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
- /* This field used only by ERSPAN */
+ /* These four fields used only by ERSPAN */
u32 index; /* ERSPAN type II index */
+ u8 erspan_ver; /* ERSPAN version */
+ u8 dir; /* ERSPAN direction */
+ u16 hwid; /* ERSPAN hardware ID */
struct dst_cache dst_cache;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ff68cf288f9b..eb0bec043c96 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -69,8 +69,7 @@ struct ip_vs_iphdr {
};
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
- int len, void *buffer,
- const struct ip_vs_iphdr *ipvsh)
+ int len, void *buffer)
{
return skb_header_pointer(skb, offset, len, buffer);
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 221238254eb7..8606c9113d3f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -22,6 +22,7 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/snmp.h>
+#include <net/netns/hash.h>
#define SIN6_LEN_RFC2133 24
@@ -674,6 +675,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL;
}
+static inline u32 ipv6_portaddr_hash(const struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_v4mapped(addr6))
+ hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
/*
* Check for a RFC 4843 ORCHID address
* (Overlay Routable Cryptographic Hash Identifiers)
@@ -953,6 +970,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index eec143cca1c0..906e90223066 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1552,6 +1552,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
* @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
* driver for a key to indicate that sufficient tailroom must always
* be reserved for ICV or MIC, even when HW encryption is enabled.
+ * @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for
+ * a TKIP key if it only requires MIC space. Do not set together with
+ * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key.
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -1562,6 +1565,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_PUT_IV_SPACE = BIT(5),
IEEE80211_KEY_FLAG_RX_MGMT = BIT(6),
IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7),
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
};
/**
@@ -1593,8 +1597,8 @@ struct ieee80211_key_conf {
u8 icv_len;
u8 iv_len;
u8 hw_key_idx;
- u8 flags;
s8 keyidx;
+ u16 flags;
u8 keylen;
u8 key[0];
};
@@ -2056,6 +2060,9 @@ struct ieee80211_txq {
* The stack will not do fragmentation.
* The callback for @set_frag_threshold should be set as well.
*
+ * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
+ * TDLS links.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2098,6 +2105,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_TX_FRAG_LIST,
IEEE80211_HW_REPORTS_LOW_ACK,
IEEE80211_HW_SUPPORTS_TX_FRAG,
+ IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 049008493faf..f306b2aa15a4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -51,7 +51,7 @@ struct net {
refcount_t passive; /* To decided when the network
* namespace should be freed.
*/
- atomic_t count; /* To decided when the network
+ refcount_t count; /* To decided when the network
* namespace should be shut down.
*/
spinlock_t rules_mod_lock;
@@ -195,7 +195,7 @@ void __put_net(struct net *net);
static inline struct net *get_net(struct net *net)
{
- atomic_inc(&net->count);
+ refcount_inc(&net->count);
return net;
}
@@ -206,14 +206,14 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!atomic_inc_not_zero(&net->count))
+ if (!refcount_inc_not_zero(&net->count))
net = NULL;
return net;
}
static inline void put_net(struct net *net)
{
- if (atomic_dec_and_test(&net->count))
+ if (refcount_dec_and_test(&net->count))
__put_net(net);
}
@@ -225,7 +225,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
- return atomic_read(&net->count) != 0;
+ return refcount_read(&net->count) != 0;
}
void net_drop_ns(void *);
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 4ed1040bbe4a..73f825732326 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -13,17 +13,17 @@
const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
#endif
int nf_conntrack_ipv4_compat_init(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 9cd55be95853..effa8dfba68c 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,17 +4,17 @@
extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
#endif
#include <linux/sysctl.h>
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
new file mode 100644
index 000000000000..adf8db44cf86
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -0,0 +1,17 @@
+#ifndef _NF_CONNTRACK_COUNT_H
+#define _NF_CONNTRACK_COUNT_H
+
+struct nf_conncount_data;
+
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+ unsigned int keylen);
+void nf_conncount_destroy(struct net *net, unsigned int family,
+ struct nf_conncount_data *data);
+
+unsigned int nf_conncount_count(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ unsigned int family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone);
+#endif
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7ef56c13698a..a7220eef9aee 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,6 +27,9 @@ struct nf_conntrack_l4proto {
/* Resolve clashes on insertion races. */
bool allow_clash;
+ /* protoinfo nlattr size, closes a hole */
+ u16 nlattr_size;
+
/* Try to fill in the third arg: dataoff is offset past network protocol
hdr. Return true if possible. */
bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
@@ -66,8 +69,6 @@ struct nf_conntrack_l4proto {
/* convert protoinfo to nfnetink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct);
- /* Calculate protoinfo nlattr size */
- int (*nlattr_size)(void);
/* convert nfnetlink attributes to protoinfo */
int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);
@@ -80,8 +81,6 @@ struct nf_conntrack_l4proto {
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
- size_t nla_size;
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct {
int (*nlattr_to_obj)(struct nlattr *tb[],
@@ -109,7 +108,7 @@ struct nf_conntrack_l4proto {
};
/* Existing built-in generic protocol */
-extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
#define MAX_NF_CT_PROTO 256
@@ -126,18 +125,18 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *const proto[],
+ const struct nf_conntrack_l4proto *const proto[],
unsigned int num_proto);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *const proto[],
+ const struct nf_conntrack_l4proto *const proto[],
unsigned int num_proto);
/* Protocol global registration. */
-int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
+int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
+void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
unsigned int num_proto);
/* Generic netlink helpers */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
new file mode 100644
index 000000000000..b22b22082733
--- /dev/null
+++ b/include/net/netfilter/nf_flow_table.h
@@ -0,0 +1,122 @@
+#ifndef _NF_FLOW_TABLE_H
+#define _NF_FLOW_TABLE_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/rcupdate.h>
+#include <net/dst.h>
+
+struct nf_flowtable;
+
+struct nf_flowtable_type {
+ struct list_head list;
+ int family;
+ void (*gc)(struct work_struct *work);
+ const struct rhashtable_params *params;
+ nf_hookfn *hook;
+ struct module *owner;
+};
+
+struct nf_flowtable {
+ struct rhashtable rhashtable;
+ const struct nf_flowtable_type *type;
+ struct delayed_work gc_work;
+};
+
+enum flow_offload_tuple_dir {
+ FLOW_OFFLOAD_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY,
+ __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
+};
+#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
+
+struct flow_offload_tuple {
+ union {
+ struct in_addr src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ struct in_addr dst_v4;
+ struct in6_addr dst_v6;
+ };
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+
+ int iifidx;
+
+ u8 l3proto;
+ u8 l4proto;
+ u8 dir;
+
+ int oifidx;
+
+ struct dst_entry *dst_cache;
+};
+
+struct flow_offload_tuple_rhash {
+ struct rhash_head node;
+ struct flow_offload_tuple tuple;
+};
+
+#define FLOW_OFFLOAD_SNAT 0x1
+#define FLOW_OFFLOAD_DNAT 0x2
+#define FLOW_OFFLOAD_DYING 0x4
+
+struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+ u32 flags;
+ union {
+ /* Your private driver data here. */
+ u32 timeout;
+ };
+};
+
+#define NF_FLOW_TIMEOUT (30 * HZ)
+
+struct nf_flow_route {
+ struct {
+ struct dst_entry *dst;
+ int ifindex;
+ } tuple[FLOW_OFFLOAD_DIR_MAX];
+};
+
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
+ struct nf_flow_route *route);
+void flow_offload_free(struct flow_offload *flow);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
+struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple);
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+void nf_flow_offload_work_gc(struct work_struct *work);
+extern const struct rhashtable_params nf_flow_offload_rhash_params;
+
+void flow_offload_dead(struct flow_offload *flow);
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+
+struct flow_ports {
+ __be16 source, dest;
+};
+
+unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+
+#define MODULE_ALIAS_NF_FLOWTABLE(family) \
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
+
+#endif /* _FLOW_OFFLOAD_H */
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 814058d0f167..a50a69f5334c 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -25,7 +25,7 @@ struct nf_queue_entry {
struct nf_queue_handler {
int (*outfn)(struct nf_queue_entry *entry,
unsigned int queuenum);
- unsigned int (*nf_hook_drop)(struct net *net);
+ void (*nf_hook_drop)(struct net *net);
};
void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index fecc6112c768..663b015dace5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -9,6 +9,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
#define NFT_JUMP_STACK_SIZE 16
@@ -54,8 +55,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
pkt->xt.state = state;
}
-static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
pkt->tprot_set = false;
pkt->tprot = 0;
@@ -63,14 +64,6 @@ static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
pkt->xt.fragoff = 0;
}
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- nft_set_pktinfo(pkt, skb, state);
- nft_set_pktinfo_proto_unspec(pkt, skb);
-}
-
/**
* struct nft_verdict - nf_tables verdict
*
@@ -150,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data)
* struct nft_ctx - nf_tables rule/set context
*
* @net: net namespace
- * @afi: address family info
* @table: the table the chain is contained in
* @chain: the chain the rule is contained in
* @nla: netlink attributes
* @portid: netlink portID of the original message
* @seq: netlink sequence number
+ * @family: protocol family
* @report: notify via unicast netlink message
*/
struct nft_ctx {
struct net *net;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
const struct nlattr * const *nla;
u32 portid;
u32 seq;
+ u8 family;
bool report;
};
@@ -381,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type);
* @list: table set list node
* @bindings: list of set bindings
* @name: name of the set
+ * @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
* @objtype: object type (see NFT_OBJECT_* definitions)
@@ -403,6 +397,7 @@ struct nft_set {
struct list_head list;
struct list_head bindings;
char *name;
+ u64 handle;
u32 ktype;
u32 dtype;
u32 objtype;
@@ -424,6 +419,11 @@ struct nft_set {
__attribute__((aligned(__alignof__(u64))));
};
+static inline bool nft_set_is_anonymous(const struct nft_set *set)
+{
+ return set->flags & NFT_SET_ANONYMOUS;
+}
+
static inline void *nft_set_priv(const struct nft_set *set)
{
return (void *)set->data;
@@ -883,7 +883,7 @@ enum nft_chain_type {
* @family: address family
* @owner: module owner
* @hook_mask: mask of valid hooks
- * @hooks: hookfn overrides
+ * @hooks: array of hook functions
*/
struct nf_chain_type {
const char *name;
@@ -905,8 +905,6 @@ struct nft_stats {
struct u64_stats_sync syncp;
};
-#define NFT_HOOK_OPS_MAX 2
-
/**
* struct nft_base_chain - nf_tables base chain
*
@@ -918,7 +916,7 @@ struct nft_stats {
* @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
- struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
+ struct nf_hook_ops ops;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
@@ -948,10 +946,13 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @chains: chains in the table
* @sets: sets in the table
* @objects: stateful objects in the table
+ * @flowtables: flow tables in the table
* @hgenerator: handle generator state
+ * @handle: table handle
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
+ * @afinfo: address family info
* @name: name of the table
*/
struct nft_table {
@@ -959,46 +960,16 @@ struct nft_table {
struct list_head chains;
struct list_head sets;
struct list_head objects;
+ struct list_head flowtables;
u64 hgenerator;
+ u64 handle;
u32 use;
- u16 flags:14,
+ u16 family:6,
+ flags:8,
genmask:2;
char *name;
};
-enum nft_af_flags {
- NFT_AF_NEEDS_DEV = (1 << 0),
-};
-
-/**
- * struct nft_af_info - nf_tables address family info
- *
- * @list: used internally
- * @family: address family
- * @nhooks: number of hooks in this family
- * @owner: module owner
- * @tables: used internally
- * @flags: family flags
- * @nops: number of hook ops in this family
- * @hook_ops_init: initialization function for chain hook ops
- * @hooks: hookfn overrides for packet validation
- */
-struct nft_af_info {
- struct list_head list;
- int family;
- unsigned int nhooks;
- struct module *owner;
- struct list_head tables;
- u32 flags;
- unsigned int nops;
- void (*hook_ops_init)(struct nf_hook_ops *,
- unsigned int);
- nf_hookfn *hooks[NF_MAX_HOOKS];
-};
-
-int nft_register_afinfo(struct net *, struct nft_af_info *);
-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
-
int nft_register_chain_type(const struct nf_chain_type *);
void nft_unregister_chain_type(const struct nf_chain_type *);
@@ -1016,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
* @name: name of this stateful object
* @genmask: generation mask
* @use: number of references to this stateful object
- * @data: object data, layout depends on type
+ * @handle: unique object handle
* @ops: object operations
- * @data: pointer to object data
+ * @data: object data, layout depends on type
*/
struct nft_object {
struct list_head list;
@@ -1026,6 +997,7 @@ struct nft_object {
struct nft_table *table;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
@@ -1097,6 +1069,46 @@ int nft_register_obj(struct nft_object_type *obj_type);
void nft_unregister_obj(struct nft_object_type *obj_type);
/**
+ * struct nft_flowtable - nf_tables flow table
+ *
+ * @list: flow table list node in table list
+ * @table: the table the flow table is contained in
+ * @name: name of this flow table
+ * @hooknum: hook number
+ * @priority: hook priority
+ * @ops_len: number of hooks in array
+ * @genmask: generation mask
+ * @use: number of references to this flow table
+ * @handle: unique object handle
+ * @data: rhashtable and garbage collector
+ * @ops: array of hooks
+ */
+struct nft_flowtable {
+ struct list_head list;
+ struct nft_table *table;
+ char *name;
+ int hooknum;
+ int priority;
+ int ops_len;
+ u32 genmask:2,
+ use:30;
+ u64 handle;
+ /* runtime data below here */
+ struct nf_hook_ops *ops ____cacheline_aligned;
+ struct nf_flowtable data;
+};
+
+struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u8 genmask);
+void nft_flow_table_iterate(struct net *net,
+ void (*iter)(struct nf_flowtable *flowtable, void *data),
+ void *data);
+
+void nft_register_flowtable_type(struct nf_flowtable_type *type);
+void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
+
+/**
* struct nft_traceinfo - nft tracing information and state
*
* @pkt: pktinfo currently processed
@@ -1125,12 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
void nft_trace_notify(struct nft_traceinfo *info);
-#define nft_dereference(p) \
- nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
-
-#define MODULE_ALIAS_NFT_FAMILY(family) \
- MODULE_ALIAS("nft-afinfo-" __stringify(family))
-
#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
@@ -1332,4 +1338,11 @@ struct nft_trans_obj {
#define nft_trans_obj(trans) \
(((struct nft_trans_obj *)trans->data)->obj)
+struct nft_trans_flowtable {
+ struct nft_flowtable *flowtable;
+};
+
+#define nft_trans_flowtable(trans) \
+ (((struct nft_trans_flowtable *)trans->data)->flowtable)
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index f0896ba456c4..ed7b511f0a59 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -5,15 +5,11 @@
#include <net/netfilter/nf_tables.h>
#include <net/ip.h>
-static inline void
-nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *ip;
- nft_set_pktinfo(pkt, skb, state);
-
ip = ip_hdr(pkt->skb);
pkt->tprot_set = true;
pkt->tprot = ip->protocol;
@@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
-static inline int
-__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *iph, _iph;
u32 len, thoff;
@@ -52,16 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
return 0;
}
-static inline void
-nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv4;
-
#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index b8065b72f56e..dabe6fdb553a 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -5,20 +5,16 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/ipv6.h>
-static inline void
-nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
unsigned int flags = IP6_FH_F_AUTH;
int protohdr, thoff = 0;
unsigned short frag_off;
- nft_set_pktinfo(pkt, skb, state);
-
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0) {
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ nft_set_pktinfo_unspec(pkt, skb);
return;
}
@@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
pkt->xt.fragoff = frag_off;
}
-static inline int
-__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
@@ -68,16 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
#endif
}
-static inline void
-nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv6;
-
#endif
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index ecf238b8862c..ca9bd9fba5b5 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -8,7 +8,7 @@
#include <linux/spinlock.h>
-struct dev_rcv_lists;
+struct can_dev_rcv_lists;
struct s_stats;
struct s_pstats;
@@ -28,7 +28,7 @@ struct netns_can {
#endif
/* receive filters subscribed for 'all' CAN devices */
- struct dev_rcv_lists *can_rx_alldev_list;
+ struct can_dev_rcv_lists *can_rx_alldev_list;
spinlock_t can_rcvlists_lock;
struct timer_list can_stattimer;/* timer for statistics update */
struct s_stats *can_stats; /* packet statistics */
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 0ad4d0c71228..36c2d998a43c 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -11,7 +11,10 @@ struct netns_core {
int sysctl_somaxconn;
- struct prot_inuse __percpu *inuse;
+#ifdef CONFIG_PROC_FS
+ int __percpu *sock_inuse;
+ struct prot_inuse __percpu *prot_inuse;
+#endif
};
#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cc00af2ac2d7..ca043342c0eb 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -17,7 +17,17 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
- struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
+#endif
+#if IS_ENABLED(CONFIG_DECNET)
+ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
+#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
bool defrag_ipv4;
#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 4109b5f3010f..48134353411d 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,14 +7,8 @@
struct nft_af_info;
struct netns_nftables {
- struct list_head af_info;
+ struct list_head tables;
struct list_head commit_list;
- struct nft_af_info *ipv4;
- struct nft_af_info *ipv6;
- struct nft_af_info *inet;
- struct nft_af_info *arp;
- struct nft_af_info *bridge;
- struct nft_af_info *netdev;
unsigned int base_seq;
u8 gencursor;
};
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index ebc813277662..0db7fb3e4e15 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -122,9 +122,12 @@ struct netns_sctp {
/* Flag to indicate if PR-CONFIG is enabled. */
int reconf_enable;
- /* Flag to idicate if SCTP-AUTH is enabled */
+ /* Flag to indicate if SCTP-AUTH is enabled */
int auth_enable;
+ /* Flag to indicate if stream interleave is enabled */
+ int intl_enable;
+
/*
* Policy to control SCTP IPv4 address scoping
* 0 - Disable IPv4 address scoping
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 753ac9361154..87406252f0a3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -29,6 +29,7 @@ struct tcf_block_ext_info {
enum tcf_block_binder_type binder_type;
tcf_chain_head_change_t *chain_head_change;
void *chain_head_change_priv;
+ u32 block_index;
};
struct tcf_block_cb;
@@ -38,16 +39,25 @@ bool tcf_queue_work(struct work_struct *work);
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create);
void tcf_chain_put(struct tcf_chain *chain);
+void tcf_block_netif_keep_dst(struct tcf_block *block);
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q);
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct netlink_ext_ack *extack);
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
- struct tcf_block_ext_info *ei);
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack);
void tcf_block_put(struct tcf_block *block);
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei);
+static inline bool tcf_block_shared(struct tcf_block *block)
+{
+ return block->index;
+}
+
static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
{
+ WARN_ON(tcf_block_shared(block));
return block->q;
}
@@ -77,14 +87,16 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
#else
static inline
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
{
return 0;
}
static inline
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -364,7 +376,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr);
+ struct tcf_exts *exts, bool ovr,
+ struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
@@ -544,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb,
#include <net/net_namespace.h>
static inline int
-tcf_change_indev(struct net *net, struct nlattr *indev_tlv)
+tcf_change_indev(struct net *net, struct nlattr *indev_tlv,
+ struct netlink_ext_ack *extack)
{
char indev[IFNAMSIZ];
struct net_device *dev;
- if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ)
+ if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "Interface name too long");
return -EINVAL;
+ }
dev = __dev_get_by_name(net, indev);
if (!dev)
return -ENODEV;
@@ -586,17 +602,9 @@ struct tc_cls_common_offload {
u32 chain_index;
__be16 protocol;
u32 prio;
+ struct netlink_ext_ack *extack;
};
-static inline void
-tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
- const struct tcf_proto *tp)
-{
- cls_common->chain_index = tp->chain->index;
- cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
-}
-
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tc_u32_sel *sel;
@@ -637,6 +645,31 @@ static inline bool tc_can_offload(const struct net_device *dev)
return dev->features & NETIF_F_HW_TC;
}
+static inline bool tc_can_offload_extack(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ bool can = tc_can_offload(dev);
+
+ if (!can)
+ NL_SET_ERR_MSG(extack, "TC offload is disabled on net device");
+
+ return can;
+}
+
+static inline bool
+tc_cls_can_offload_and_chain0(const struct net_device *dev,
+ struct tc_cls_common_offload *common)
+{
+ if (!tc_can_offload_extack(dev, common->extack))
+ return false;
+ if (common->chain_index) {
+ NL_SET_ERR_MSG(common->extack,
+ "Driver supports only offload of chain 0");
+ return false;
+ }
+ return true;
+}
+
static inline bool tc_skip_hw(u32 flags)
{
return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
@@ -664,6 +697,18 @@ static inline bool tc_in_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false;
}
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+ const struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ cls_common->chain_index = tp->chain->index;
+ cls_common->protocol = tp->protocol;
+ cls_common->prio = tp->prio;
+ if (tc_skip_sw(flags))
+ cls_common->extack = extack;
+}
+
enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
@@ -706,7 +751,6 @@ struct tc_cls_bpf_offload {
struct bpf_prog *oldprog;
const char *name;
bool exts_integrated;
- u32 gen_flags;
};
struct tc_mqprio_qopt_offload {
@@ -727,6 +771,11 @@ struct tc_cookie {
u32 len;
};
+struct tc_qopt_offload_stats {
+ struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_queue *qstats;
+};
+
enum tc_red_command {
TC_RED_REPLACE,
TC_RED_DESTROY,
@@ -739,9 +788,6 @@ struct tc_red_qopt_offload_params {
u32 max;
u32 probability;
bool is_ecn;
-};
-struct tc_red_qopt_offload_stats {
- struct gnet_stats_basic_packed *bstats;
struct gnet_stats_queue *qstats;
};
@@ -751,9 +797,34 @@ struct tc_red_qopt_offload {
u32 parent;
union {
struct tc_red_qopt_offload_params set;
- struct tc_red_qopt_offload_stats stats;
+ struct tc_qopt_offload_stats stats;
struct red_stats *xstats;
};
};
+enum tc_prio_command {
+ TC_PRIO_REPLACE,
+ TC_PRIO_DESTROY,
+ TC_PRIO_STATS,
+};
+
+struct tc_prio_qopt_offload_params {
+ int bands;
+ u8 priomap[TC_PRIO_MAX + 1];
+ /* In case that a prio qdisc is offloaded and now is changed to a
+ * non-offloadedable config, it needs to update the backlog & qlen
+ * values to negate the HW backlog & qlen values (and only them).
+ */
+ struct gnet_stats_queue *qstats;
+};
+
+struct tc_prio_qopt_offload {
+ enum tc_prio_command command;
+ u32 handle;
+ u32 parent;
+ union {
+ struct tc_prio_qopt_offload_params replace_params;
+ struct tc_qopt_offload_stats stats;
+ };
+};
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d1f413f06c72..815b92a23936 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -89,7 +89,8 @@ extern struct Qdisc_ops pfifo_head_drop_qdisc_ops;
int fifo_set_limit(struct Qdisc *q, unsigned int limit);
struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
- unsigned int limit);
+ unsigned int limit,
+ struct netlink_ext_ack *extack);
int register_qdisc(struct Qdisc_ops *qops);
int unregister_qdisc(struct Qdisc_ops *qops);
@@ -99,22 +100,24 @@ int qdisc_set_default(const char *id);
void qdisc_hash_add(struct Qdisc *q, bool invisible);
void qdisc_hash_del(struct Qdisc *q);
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
-struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
- struct nlattr *tab);
+ struct nlattr *tab,
+ struct netlink_ext_ack *extack);
void qdisc_put_rtab(struct qdisc_rate_table *tab);
void qdisc_put_stab(struct qdisc_size_table *tab);
void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
-int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
- struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock, bool validate);
+bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+ struct net_device *dev, struct netdev_queue *txq,
+ spinlock_t *root_lock, bool validate);
void __qdisc_run(struct Qdisc *q);
static inline void qdisc_run(struct Qdisc *q)
{
- if (qdisc_run_begin(q))
+ if (qdisc_run_begin(q)) {
__qdisc_run(q);
+ qdisc_run_end(q);
+ }
}
static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
diff --git a/include/net/route.h b/include/net/route.h
index d538e6db1afe..1eb9ce470e25 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr);
void ip_rt_multicast_event(struct in_device *);
-int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
+int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index ead018744ff5..14b6b3af8918 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,10 +13,10 @@ enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = 1,
};
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
+int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+ rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
int rtnl_unregister(int protocol, int msgtype);
void rtnl_unregister_all(int protocol);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index becf86aa4ac6..eac43e8ca96d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,7 @@ struct Qdisc {
* qdisc_tree_decrease_qlen() should stop.
*/
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
+#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
u32 limit;
const struct Qdisc_ops *ops;
@@ -88,14 +89,14 @@ struct Qdisc {
/*
* For performance sake on SMP, we put highly modified fields at the end
*/
- struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
struct gnet_stats_basic_packed bstats;
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
- struct sk_buff *skb_bad_txq;
+ struct sk_buff_head skb_bad_txq;
int padded;
refcount_t refcnt;
@@ -150,19 +151,23 @@ struct Qdisc_class_ops {
/* Child qdisc manipulation */
struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *);
int (*graft)(struct Qdisc *, unsigned long cl,
- struct Qdisc *, struct Qdisc **);
+ struct Qdisc *, struct Qdisc **,
+ struct netlink_ext_ack *extack);
struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
void (*qlen_notify)(struct Qdisc *, unsigned long);
/* Class manipulation routines */
unsigned long (*find)(struct Qdisc *, u32 classid);
int (*change)(struct Qdisc *, u32, u32,
- struct nlattr **, unsigned long *);
+ struct nlattr **, unsigned long *,
+ struct netlink_ext_ack *);
int (*delete)(struct Qdisc *, unsigned long);
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
- struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long);
+ struct tcf_block * (*tcf_block)(struct Qdisc *sch,
+ unsigned long arg,
+ struct netlink_ext_ack *extack);
unsigned long (*bind_tcf)(struct Qdisc *, unsigned long,
u32 classid);
void (*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -187,15 +192,25 @@ struct Qdisc_ops {
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
- int (*init)(struct Qdisc *, struct nlattr *arg);
+ int (*init)(struct Qdisc *sch, struct nlattr *arg,
+ struct netlink_ext_ack *extack);
void (*reset)(struct Qdisc *);
void (*destroy)(struct Qdisc *);
- int (*change)(struct Qdisc *, struct nlattr *arg);
- void (*attach)(struct Qdisc *);
+ int (*change)(struct Qdisc *sch,
+ struct nlattr *arg,
+ struct netlink_ext_ack *extack);
+ void (*attach)(struct Qdisc *sch);
int (*dump)(struct Qdisc *, struct sk_buff *);
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
+ void (*ingress_block_set)(struct Qdisc *sch,
+ u32 block_index);
+ void (*egress_block_set)(struct Qdisc *sch,
+ u32 block_index);
+ u32 (*ingress_block_get)(struct Qdisc *sch);
+ u32 (*egress_block_get)(struct Qdisc *sch);
+
struct module *owner;
};
@@ -218,14 +233,18 @@ struct tcf_proto_ops {
const struct tcf_proto *,
struct tcf_result *);
int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack);
void* (*get)(struct tcf_proto*, u32 handle);
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool);
- int (*delete)(struct tcf_proto*, void *, bool*);
+ void **, bool,
+ struct netlink_ext_ack *);
+ int (*delete)(struct tcf_proto *tp, void *arg,
+ bool *last,
+ struct netlink_ext_ack *);
void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
void (*bind_class)(void *, u32, unsigned long);
@@ -247,8 +266,6 @@ struct tcf_proto {
/* All the rest */
u32 prio;
- u32 classid;
- struct Qdisc *q;
void *data;
const struct tcf_proto_ops *ops;
struct tcf_chain *chain;
@@ -267,8 +284,7 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
struct tcf_chain {
struct tcf_proto __rcu *filter_chain;
- tcf_chain_head_change_t *chain_head_change;
- void *chain_head_change_priv;
+ struct list_head filter_chain_list;
struct list_head list;
struct tcf_block *block;
u32 index; /* chain index */
@@ -277,12 +293,33 @@ struct tcf_chain {
struct tcf_block {
struct list_head chain_list;
+ u32 index; /* block index for shared blocks */
+ unsigned int refcnt;
struct net *net;
struct Qdisc *q;
struct list_head cb_list;
- struct work_struct work;
+ struct list_head owner_list;
+ bool keep_dst;
+ unsigned int offloadcnt; /* Number of oddloaded filters */
+ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
};
+static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
+{
+ if (*flags & TCA_CLS_FLAGS_IN_HW)
+ return;
+ *flags |= TCA_CLS_FLAGS_IN_HW;
+ block->offloadcnt++;
+}
+
+static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+ if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+ return;
+ *flags &= ~TCA_CLS_FLAGS_IN_HW;
+ block->offloadcnt--;
+}
+
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
struct qdisc_skb_cb *qcb;
@@ -291,11 +328,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
+{
+ __u32 qlen = 0;
+ int i;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
+ qlen = q->q.qlen;
+ }
+
+ return qlen;
+}
+
static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
{
return (struct qdisc_skb_cb *)skb->cb;
@@ -444,10 +501,12 @@ void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
unsigned int len);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- const struct Qdisc_ops *ops);
+ const struct Qdisc_ops *ops,
+ struct netlink_ext_ack *extack);
void qdisc_free(struct Qdisc *qdisc);
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
- const struct Qdisc_ops *ops, u32 parentid);
+ const struct Qdisc_ops *ops, u32 parentid,
+ struct netlink_ext_ack *extack);
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
@@ -633,12 +692,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
sch->qstats.backlog -= qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
const struct sk_buff *skb)
{
sch->qstats.backlog += qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
+{
+ this_cpu_dec(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->requeues);
+}
+
static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
{
sch->qstats.drops += count;
@@ -769,26 +855,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
/* generic pseudo peek method for non-work-conserving qdisc */
static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
{
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
+
/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
- if (!sch->gso_skb) {
- sch->gso_skb = sch->dequeue(sch);
- if (sch->gso_skb) {
+ if (!skb) {
+ skb = sch->dequeue(sch);
+
+ if (skb) {
+ __skb_queue_head(&sch->gso_skb, skb);
/* it's still part of the queue */
- qdisc_qstats_backlog_inc(sch, sch->gso_skb);
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
}
}
- return sch->gso_skb;
+ return skb;
}
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
- struct sk_buff *skb = sch->gso_skb;
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
if (skb) {
- sch->gso_skb = NULL;
+ skb = __skb_dequeue(&sch->gso_skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
} else {
@@ -846,6 +936,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
qdisc_qstats_drop(sch);
}
+static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ __qdisc_drop(skb, to_free);
+ qdisc_qstats_cpu_drop(sch);
+
+ return NET_XMIT_DROP;
+}
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index deaafa9b09cb..20ff237c5eb2 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other)
SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive)
-#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
+#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
+ a->chunk_hdr->type == SCTP_CID_I_DATA)
/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
- - (unsigned long)(c->chunk_hdr)\
- - sizeof(struct sctp_data_chunk)))
+#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
+ (unsigned long)(c->chunk_hdr) - \
+ sctp_datachk_len(&c->asoc->stream)))
/* Internal error codes */
enum sctp_ierror {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 906a9c0efa71..20c0c1be2ca7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
-int sctp_transport_walk_start(struct rhashtable_iter *iter);
+void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
struct rhashtable_iter *iter);
@@ -444,13 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
int frag = pmtu;
frag -= sp->pf->af->net_header_len;
- frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
+ frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
if (asoc->user_frag)
frag = min_t(int, frag, asoc->user_frag);
frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- sizeof(struct sctp_data_chunk)));
+ sctp_datachk_len(&asoc->stream)));
return frag;
}
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 70fb397f65b0..2883c43c5258 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
const __u32 lowest_tsn,
const struct sctp_chunk *chunk);
-struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
+ __u8 flags, int paylen, gfp_t gfp);
+struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
+ __u32 new_cum_tsn, size_t nstreams,
+ struct sctp_ifwdtsn_skip *skiplist);
+struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
const struct sctp_sndrcvinfo *sinfo,
- int len, const __u8 flags,
- __u16 ssn, gfp_t gfp);
+ int len, __u8 flags, gfp_t gfp);
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
const __u32 lowest_tsn);
struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
@@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
__u16 size;
size = ntohs(chunk->chunk_hdr->length);
- size -= sizeof(struct sctp_data_chunk);
+ size -= sctp_datahdr_len(&chunk->asoc->stream);
return size;
}
@@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
typecheck(__u32, b) && \
((__s32)((a) - (b)) <= 0))
+/* Compare two MIDs */
+#define MID_lt(a, b) \
+ (typecheck(__u32, a) && \
+ typecheck(__u32, b) && \
+ ((__s32)((a) - (b)) < 0))
+
/* Compare two SSNs */
#define SSN_lt(a,b) \
(typecheck(__u16, a) && \
diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h
new file mode 100644
index 000000000000..6657711c8bc4
--- /dev/null
+++ b/include/net/sctp/stream_interleave.h
@@ -0,0 +1,61 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * These are definitions used by the stream schedulers, defined in RFC
+ * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11)
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresses:
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#ifndef __sctp_stream_interleave_h__
+#define __sctp_stream_interleave_h__
+
+struct sctp_stream_interleave {
+ __u16 data_chunk_len;
+ __u16 ftsn_chunk_len;
+ /* (I-)DATA process */
+ struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc,
+ const struct sctp_sndrcvinfo *sinfo,
+ int len, __u8 flags, gfp_t gfp);
+ void (*assign_number)(struct sctp_chunk *chunk);
+ bool (*validate_data)(struct sctp_chunk *chunk);
+ int (*ulpevent_data)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ int (*enqueue_event)(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event);
+ void (*renege_events)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ /* (I-)FORWARD-TSN process */
+ void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn);
+ bool (*validate_ftsn)(struct sctp_chunk *chunk);
+ void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn);
+ void (*handle_ftsn)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk);
+};
+
+void sctp_stream_interleave_init(struct sctp_stream *stream);
+
+#endif /* __sctp_stream_interleave_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 9a5ccf03a59b..02369e379d35 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -89,6 +89,7 @@ struct sctp_stream;
#include <net/sctp/tsnmap.h>
#include <net/sctp/ulpevent.h>
#include <net/sctp/ulpqueue.h>
+#include <net/sctp/stream_interleave.h>
/* Structures useful for managing bind/connect. */
@@ -217,6 +218,7 @@ struct sctp_sock {
disable_fragments:1,
v4mapped:1,
frag_interleave:1,
+ strm_interleave:1,
recvrcvinfo:1,
recvnxtinfo:1,
data_ready_signalled:1;
@@ -397,6 +399,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
#define sctp_ssn_skip(stream, type, sid, ssn) \
((stream)->type[sid].ssn = ssn + 1)
+/* What is the current MID number for this stream? */
+#define sctp_mid_peek(stream, type, sid) \
+ ((stream)->type[sid].mid)
+
+/* Return the next MID number for this stream. */
+#define sctp_mid_next(stream, type, sid) \
+ ((stream)->type[sid].mid++)
+
+/* Skip over this mid and all below. */
+#define sctp_mid_skip(stream, type, sid, mid) \
+ ((stream)->type[sid].mid = mid + 1)
+
+#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
+
+/* What is the current MID_uo number for this stream? */
+#define sctp_mid_uo_peek(stream, type, sid) \
+ ((stream)->type[sid].mid_uo)
+
+/* Return the next MID_uo number for this stream. */
+#define sctp_mid_uo_next(stream, type, sid) \
+ ((stream)->type[sid].mid_uo++)
+
/*
* Pointers to address related SCTP functions.
* (i.e. things that depend on the address family.)
@@ -574,6 +598,8 @@ struct sctp_chunk {
struct sctp_addiphdr *addip_hdr;
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_authhdr *auth_hdr;
+ struct sctp_idatahdr *idata_hdr;
+ struct sctp_ifwdtsn_hdr *ifwdtsn_hdr;
} subh;
__u8 *chunk_end;
@@ -620,6 +646,7 @@ struct sctp_chunk {
__u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
has_tsn:1, /* Does this chunk have a TSN yet? */
has_ssn:1, /* Does this chunk have a SSN yet? */
+#define has_mid has_ssn
singleton:1, /* Only chunk in the packet? */
end_of_packet:1, /* Last chunk in the packet? */
ecn_ce_done:1, /* Have we processed the ECN CE bit? */
@@ -1073,6 +1100,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
void sctp_prsctp_prune(struct sctp_association *asoc,
struct sctp_sndrcvinfo *sinfo, int msg_len);
+void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
/* Uncork and flush an outqueue. */
static inline void sctp_outq_cork(struct sctp_outq *q)
{
@@ -1357,13 +1385,25 @@ struct sctp_stream_out_ext {
};
struct sctp_stream_out {
- __u16 ssn;
- __u8 state;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
struct sctp_stream_out_ext *ext;
+ __u8 state;
};
struct sctp_stream_in {
- __u16 ssn;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
+ __u32 fsn;
+ __u32 fsn_uo;
+ char pd_mode;
+ char pd_mode_uo;
};
struct sctp_stream {
@@ -1387,11 +1427,32 @@ struct sctp_stream {
struct sctp_stream_out_ext *rr_next;
};
};
+ struct sctp_stream_interleave *si;
};
#define SCTP_STREAM_CLOSED 0x00
#define SCTP_STREAM_OPEN 0x01
+static inline __u16 sctp_datachk_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len;
+}
+
+static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
+static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len;
+}
+
+static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
/* SCTP_GET_ASSOC_STATS counters */
struct sctp_priv_assoc_stats {
/* Maximum observed rto in the association during subsequent
@@ -1940,6 +2001,7 @@ struct sctp_association {
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
temp:1, /* Is it a temporary association? */
force_delay:1,
+ intl_enable:1,
prsctp_enable:1,
reconf_enable:1;
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 231dc42f1da6..51b4e0626c34 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -45,19 +45,29 @@
/* A structure to carry information to the ULP (e.g. Sockets API) */
/* Warning: This sits inside an skb.cb[] area. Be very careful of
* growing this structure as it is at the maximum limit now.
+ *
+ * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes
+ * have been taken by sock_skb_cb, So here it has to use 'packed'
+ * to make sctp_ulpevent fit into the rest 44 bytes.
*/
struct sctp_ulpevent {
struct sctp_association *asoc;
struct sctp_chunk *chunk;
unsigned int rmem_len;
- __u32 ppid;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ union {
+ __u32 ppid;
+ __u32 fsn;
+ };
__u32 tsn;
__u32 cumtsn;
__u16 stream;
- __u16 ssn;
__u16 flags;
__u16 msg_flags;
-};
+} __packed;
/* Retrieve the skb this event sits inside of. */
static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev)
@@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
const struct sctp_association *asoc,
- __u32 indication, gfp_t gfp);
+ __u32 indication, __u32 sid, __u32 seq,
+ __u32 flags, gfp_t gfp);
struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication(
const struct sctp_association *asoc, gfp_t gfp);
@@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
const struct sctp_association *asoc, __u16 flags,
__u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp);
+struct sctp_ulpevent *sctp_make_reassembled_event(
+ struct net *net, struct sk_buff_head *queue,
+ struct sk_buff *f_frag, struct sk_buff *l_frag);
+
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *);
void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index e0dce07b8794..bb0ecba3db2b 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -45,6 +45,7 @@ struct sctp_ulpq {
char pd_mode;
struct sctp_association *asoc;
struct sk_buff_head reasm;
+ struct sk_buff_head reasm_uo;
struct sk_buff_head lobby;
};
@@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc);
void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn);
void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32);
-#endif /* __sctp_ulpqueue_h__ */
-
-
-
-
-
+__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
+ struct sk_buff_head *list, __u16 needed);
+#endif /* __sctp_ulpqueue_h__ */
diff --git a/include/net/sock.h b/include/net/sock.h
index 7a7b14e9628a..73b7830b0bb8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -72,6 +72,7 @@
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/smc.h>
+#include <net/l3mdev.h>
/*
* This structure really needs to be cleaned up.
@@ -1262,6 +1263,7 @@ proto_memory_pressure(struct proto *prot)
/* Called with local bh disabled */
void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
int sock_prot_inuse_get(struct net *net, struct proto *proto);
+int sock_inuse_get(struct net *net);
#else
static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
int inc)
@@ -2337,31 +2339,6 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
-/**
- * sk_state_load - read sk->sk_state for lockless contexts
- * @sk: socket pointer
- *
- * Paired with sk_state_store(). Used in places we do not hold socket lock :
- * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
- */
-static inline int sk_state_load(const struct sock *sk)
-{
- return smp_load_acquire(&sk->sk_state);
-}
-
-/**
- * sk_state_store - update sk->sk_state
- * @sk: socket pointer
- * @newstate: new state
- *
- * Paired with sk_state_load(). Should be used in contexts where
- * state change might impact lockless readers.
- */
-static inline void sk_state_store(struct sock *sk, int newstate)
-{
- smp_store_release(&sk->sk_state, newstate);
-}
-
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
@@ -2412,4 +2389,34 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
return *proto->sysctl_rmem;
}
+/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+ * Some wifi drivers need to tweak it to get more chunks.
+ * They can use this helper from their ndo_start_xmit()
+ */
+static inline void sk_pacing_shift_update(struct sock *sk, int val)
+{
+ if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ return;
+ sk->sk_pacing_shift = val;
+}
+
+/* if a socket is bound to a device, check that the given device
+ * index is either the same or that the socket is bound to an L3
+ * master device and the given device index is also enslaved to
+ * that L3 master
+ */
+static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
+{
+ int mdif;
+
+ if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
+ return true;
+
+ mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
+ if (mdif && mdif == sk->sk_bound_dev_if)
+ return true;
+
+ return false;
+}
+
#endif /* _SOCK_H */
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 781f3433a0be..9470fd7e4350 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -6,10 +6,16 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_csum.h>
+struct tcf_csum_params {
+ int action;
+ u32 update_flags;
+ struct rcu_head rcu;
+};
+
struct tcf_csum {
struct tc_action common;
- u32 update_flags;
+ struct tcf_csum_params __rcu *params;
};
#define to_tcf_csum(a) ((struct tcf_csum *)a)
@@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a)
static inline u32 tcf_csum_update_flags(const struct tc_action *a)
{
- return to_tcf_csum(a)->update_flags;
+ u32 update_flags;
+
+ rcu_read_lock();
+ update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags;
+ rcu_read_unlock();
+
+ return update_flags;
}
#endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 21d253c9a8c6..a2e9cbca5c9e 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,10 +8,8 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
- int tcfm_ifindex;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
- struct net *net;
struct list_head tcfm_list;
};
#define to_mirred(a) ((struct tcf_mirred *)a)
@@ -34,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
return false;
}
-static inline int tcf_mirred_ifindex(const struct tc_action *a)
+static inline struct net_device *tcf_mirred_dev(const struct tc_action *a)
{
- return to_mirred(a)->tcfm_ifindex;
+ return rtnl_dereference(to_mirred(a)->tcfm_dev);
}
#endif /* __NET_TC_MIR_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6da880d2f022..093e967a2960 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -953,6 +953,7 @@ struct rate_sample {
u32 prior_in_flight; /* in flight before this ACK */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
+ bool is_ack_delayed; /* is this (likely) a delayed ACK? */
};
struct tcp_congestion_ops {
@@ -1507,8 +1508,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie, int *syn_loss,
- unsigned long *last_syn_loss);
+ struct tcp_fastopen_cookie *cookie);
void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie, bool syn_lost,
u16 try_exp);
@@ -1546,7 +1546,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_fastopen_active_disable(struct sock *sk);
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
-void tcp_fastopen_active_timeout_reset(void);
+void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
@@ -2006,17 +2006,21 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;
- if (sk_fullsock(sk))
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+ if (sk_fullsock(sk)) {
+ sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
+ }
- memset(&sock_ops, 0, sizeof(sock_ops));
sock_ops.sk = sk;
sock_ops.op = op;
+ if (nargs > 0)
+ memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
@@ -2025,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ u32 args[2] = {arg1, arg2};
+
+ return tcp_call_bpf(sk, op, 2, args);
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ u32 args[3] = {arg1, arg2, arg3};
+
+ return tcp_call_bpf(sk, op, 3, args);
+}
+
#else
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ return -EPERM;
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ return -EPERM;
+}
+
#endif
static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
@@ -2047,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0)
rwnd = 0;
@@ -2056,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
- return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
#if IS_ENABLED(CONFIG_SMC)
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index f96391e84a8a..ad73d8b3fcc2 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -301,7 +301,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
l4_hdr = ipv6_hdr(skb)->nexthdr;
break;
default:
- return features;;
+ return features;
}
if ((l4_hdr == IPPROTO_UDP) &&
diff --git a/include/net/wext.h b/include/net/wext.h
index e51f067fdb3a..aa192a670304 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -7,7 +7,7 @@
struct net;
#ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg);
int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
unsigned long arg);
@@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
struct iw_statistics *get_wireless_stats(struct net_device *dev);
int call_commit_handler(struct net_device *dev);
#else
-static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg)
{
return -EINVAL;
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 000000000000..b2362ddfa694
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,48 @@
+/* include/net/xdp.h
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#ifndef __LINUX_NET_XDP_H__
+#define __LINUX_NET_XDP_H__
+
+/**
+ * DOC: XDP RX-queue information
+ *
+ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
+ * level RX-ring queues. It is information that is specific to how
+ * the driver have configured a given RX-ring queue.
+ *
+ * Each xdp_buff frame received in the driver carry a (pointer)
+ * reference to this xdp_rxq_info structure. This provides the XDP
+ * data-path read-access to RX-info for both kernel and bpf-side
+ * (limited subset).
+ *
+ * For now, direct access is only safe while running in NAPI/softirq
+ * context. Contents is read-mostly and must not be updated during
+ * driver NAPI/softirq poll.
+ *
+ * The driver usage API is a register and unregister API.
+ *
+ * The struct is not directly tied to the XDP prog. A new XDP prog
+ * can be attached as long as it doesn't change the underlying
+ * RX-ring. If the RX-ring does change significantly, the NIC driver
+ * naturally need to stop the RX-ring before purging and reallocating
+ * memory. In that process the driver MUST call unregistor (which
+ * also apply for driver shutdown and unload). The register API is
+ * also mandatory during RX-ring setup.
+ */
+
+struct xdp_rxq_info {
+ struct net_device *dev;
+ u32 queue_index;
+ u32 reg_state;
+} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index);
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+
+#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ae35991b5877..7d2077665c0b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c
/* A struct encoding bundle of transformations to apply to some set of flow.
*
- * dst->child points to the next element of bundle.
+ * xdst->child points to the next element of bundle.
* dst->xfrm points to an instanse of transformer.
*
* Due to unfortunate limitations of current routing cache, which we
@@ -984,6 +984,8 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
+ struct dst_entry *child;
+ struct dst_entry *path;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
@@ -994,7 +996,35 @@ struct xfrm_dst {
u32 path_cookie;
};
+static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
+
+ return xdst->path;
+ }
+#endif
+ return (struct dst_entry *) dst;
+}
+
+static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ return xdst->child;
+ }
+#endif
+ return NULL;
+}
+
#ifdef CONFIG_XFRM
+static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
+{
+ xdst->child = child;
+}
+
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
@@ -1021,6 +1051,7 @@ struct xfrm_offload {
#define XFRM_GSO_SEGMENT 16
#define XFRM_GRO 32
#define XFRM_ESP_NO_TRAILER 64
+#define XFRM_DEV_RESUME 128
__u32 status;
#define CRYPTO_SUCCESS 1
@@ -1847,34 +1878,53 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
{
return skb->sp->xvec[skb->sp->len - 1];
}
+#endif
+
static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
{
+#ifdef CONFIG_XFRM
struct sec_path *sp = skb->sp;
if (!sp || !sp->olen || sp->len != sp->olen)
return NULL;
return &sp->ovec[sp->olen - 1];
-}
+#else
+ return NULL;
#endif
+}
void __net_init xfrm_dev_init(void);
#ifdef CONFIG_XFRM_OFFLOAD
-int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
+void xfrm_dev_resume(struct sk_buff *skb);
+void xfrm_dev_backlog(struct softnet_data *sd);
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+ struct xfrm_state_offload *xso = &x->xso;
+
+ if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+ xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
+ struct xfrm_dst *xdst;
if (!x || !x->type_offload)
return false;
- if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
- !dst->child->xfrm)
+ xdst = (struct xfrm_dst *) dst;
+ if (!x->xso.offload_handle && !xdst->child->xfrm)
+ return true;
+ if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+ !xdst->child->xfrm)
return true;
return false;
@@ -1894,15 +1944,24 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
struct net_device *dev = xso->dev;
if (dev && dev->xfrmdev_ops) {
- dev->xfrmdev_ops->xdo_dev_state_free(x);
+ if (dev->xfrmdev_ops->xdo_dev_state_free)
+ dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
dev_put(dev);
}
}
#else
-static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+static inline void xfrm_dev_resume(struct sk_buff *skb)
{
- return 0;
+}
+
+static inline void xfrm_dev_backlog(struct softnet_data *sd)
+{
+}
+
+static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
+{
+ return skb;
}
static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo)
@@ -1923,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
return false;
}
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
return false;