aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h297
1 files changed, 137 insertions, 160 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e8d79d4ebcfe..994f7423a74b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
#include <uapi/linux/netdevice.h>
#include <uapi/linux/if_bonding.h>
#include <uapi/linux/pkt_cls.h>
+#include <linux/hashtable.h>
struct netpoll_info;
struct device;
@@ -191,6 +192,7 @@ struct net_device_stats {
#ifdef CONFIG_RPS
#include <linux/static_key.h>
extern struct static_key rps_needed;
+extern struct static_key rfs_needed;
#endif
struct neighbour;
@@ -315,7 +317,6 @@ struct napi_struct {
unsigned int gro_count;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
- spinlock_t poll_lock;
int poll_owner;
#endif
struct net_device *dev;
@@ -333,6 +334,16 @@ enum {
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+};
+
+enum {
+ NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED),
+ NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE),
+ NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC),
+ NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED),
+ NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+ NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
};
enum gro_result {
@@ -452,32 +463,22 @@ static inline bool napi_reschedule(struct napi_struct *napi)
return false;
}
-void __napi_complete(struct napi_struct *n);
-void napi_complete_done(struct napi_struct *n, int work_done);
+bool __napi_complete(struct napi_struct *n);
+bool napi_complete_done(struct napi_struct *n, int work_done);
/**
* napi_complete - NAPI processing complete
* @n: NAPI context
*
* Mark NAPI processing as complete.
* Consider using napi_complete_done() instead.
+ * Return false if device should avoid rearming interrupts.
*/
-static inline void napi_complete(struct napi_struct *n)
+static inline bool napi_complete(struct napi_struct *n)
{
return napi_complete_done(n, 0);
}
/**
- * napi_hash_add - add a NAPI to global hashtable
- * @napi: NAPI context
- *
- * Generate a new napi_id and store a @napi under it in napi_hash.
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
- * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future Linux version.
- */
-void napi_hash_add(struct napi_struct *napi);
-
-/**
* napi_hash_del - remove a NAPI from global table
* @napi: NAPI context
*
@@ -731,8 +732,8 @@ struct xps_dev_maps {
struct rcu_head rcu;
struct xps_map __rcu *cpu_map[0];
};
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \
- (nr_cpu_ids * sizeof(struct xps_map *)))
+#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
+ (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
#endif /* CONFIG_XPS */
#define TC_MAX_QUEUE 16
@@ -788,6 +789,7 @@ enum {
TC_SETUP_CLSU32,
TC_SETUP_CLSFLOWER,
TC_SETUP_MATCHALL,
+ TC_SETUP_CLSBPF,
};
struct tc_cls_u32_offload;
@@ -799,7 +801,9 @@ struct tc_to_netdev {
struct tc_cls_u32_offload *cls_u32;
struct tc_cls_flower_offload *cls_flower;
struct tc_cls_matchall_offload *cls_mall;
+ struct tc_cls_bpf_offload *cls_bpf;
};
+ bool egress_dev;
};
/* These structures hold the attributes of xdp state that are being passed
@@ -923,6 +927,14 @@ struct netdev_xdp {
* 3. Update dev->stats asynchronously and atomically, and define
* neither operation.
*
+ * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
+ * Return true if this device supports offload stats of this attr_id.
+ *
+ * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
+ * void *attr_data)
+ * Get statistics for offload operations by attr_id. Write it into the
+ * attr_data pointer.
+ *
* int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
* If device supports VLAN filtering this function is called when a
* VLAN id is registered.
@@ -935,7 +947,8 @@ struct netdev_xdp {
*
* SR-IOV management functions.
* int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
- * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
+ * u8 qos, __be16 proto);
* int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
* int max_tx_rate);
* int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
@@ -1030,7 +1043,7 @@ struct netdev_xdp {
* Deletes the FDB entry from dev coresponding to addr.
* int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
* struct net_device *dev, struct net_device *filter_dev,
- * int idx)
+ * int *idx)
* Used to add FDB entries to dump requests. Implementers should add
* entries to skb and update idx with the number of entries.
*
@@ -1154,6 +1167,10 @@ struct net_device_ops {
struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
struct rtnl_link_stats64 *storage);
+ bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
+ int (*ndo_get_offload_stats)(int attr_id,
+ const struct net_device *dev,
+ void *attr_data);
struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
int (*ndo_vlan_rx_add_vid)(struct net_device *dev,
@@ -1172,7 +1189,8 @@ struct net_device_ops {
int (*ndo_set_vf_mac)(struct net_device *dev,
int queue, u8 *mac);
int (*ndo_set_vf_vlan)(struct net_device *dev,
- int queue, u16 vlan, u8 qos);
+ int queue, u16 vlan,
+ u8 qos, __be16 proto);
int (*ndo_set_vf_rate)(struct net_device *dev,
int vf, int min_tx_rate,
int max_tx_rate);
@@ -1262,7 +1280,7 @@ struct net_device_ops {
struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev,
- int idx);
+ int *idx);
int (*ndo_bridge_setlink)(struct net_device *dev,
struct nlmsghdr *nlh,
@@ -1439,7 +1457,6 @@ enum netdev_priv_flags {
* @ptype_specific: Device-specific, protocol-specific packet handlers
*
* @adj_list: Directly linked devices, like slaves for bonding
- * @all_adj_list: All linked devices, *including* neighbours
* @features: Currently active device features
* @hw_features: User-changeable features
*
@@ -1489,6 +1506,8 @@ enum netdev_priv_flags {
* @if_port: Selectable AUI, TP, ...
* @dma: DMA channel
* @mtu: Interface MTU value
+ * @min_mtu: Interface Minimum MTU value
+ * @max_mtu: Interface Maximum MTU value
* @type: Interface hardware type
* @hard_header_len: Maximum hardware header length.
*
@@ -1561,8 +1580,6 @@ enum netdev_priv_flags {
*
* @xps_maps: XXX: need comments on this one
*
- * @offload_fwd_mark: Offload device fwding mark
- *
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
* @watchdog_timer: List of timers
@@ -1604,7 +1621,7 @@ enum netdev_priv_flags {
* @dcbnl_ops: Data Center Bridging netlink ops
* @num_tc: Number of traffic classes in the net device
* @tc_to_txq: XXX: need comments on this one
- * @prio_tc_map XXX: need comments on this one
+ * @prio_tc_map: XXX: need comments on this one
*
* @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp
*
@@ -1658,11 +1675,6 @@ struct net_device {
struct list_head lower;
} adj_list;
- struct {
- struct list_head upper;
- struct list_head lower;
- } all_adj_list;
-
netdev_features_t features;
netdev_features_t hw_features;
netdev_features_t wanted_features;
@@ -1711,6 +1723,8 @@ struct net_device {
unsigned char dma;
unsigned int mtu;
+ unsigned int min_mtu;
+ unsigned int max_mtu;
unsigned short type;
unsigned short hard_header_len;
@@ -1784,7 +1798,7 @@ struct net_device {
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
- struct list_head nf_hooks_ingress;
+ struct nf_hook_entry __rcu *nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN];
@@ -1800,6 +1814,9 @@ struct net_device {
unsigned int num_tx_queues;
unsigned int real_num_tx_queues;
struct Qdisc *qdisc;
+#ifdef CONFIG_NET_SCHED
+ DECLARE_HASHTABLE (qdisc_hash, 4);
+#endif
unsigned long tx_queue_len;
spinlock_t tx_global_lock;
int watchdog_timeo;
@@ -1810,9 +1827,6 @@ struct net_device {
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto __rcu *egress_cl_list;
#endif
-#ifdef CONFIG_NET_SWITCHDEV
- u32 offload_fwd_mark;
-#endif
/* These may be needed for future network-power-down code. */
struct timer_list watchdog_timer;
@@ -1907,34 +1921,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
return 0;
}
-static inline
-void netdev_reset_tc(struct net_device *dev)
-{
- dev->num_tc = 0;
- memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
- memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
-}
-
-static inline
-int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
-{
- if (tc >= dev->num_tc)
- return -EINVAL;
-
- dev->tc_to_txq[tc].count = count;
- dev->tc_to_txq[tc].offset = offset;
- return 0;
-}
-
-static inline
-int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
-{
- if (num_tc > TC_MAX_QUEUE)
- return -EINVAL;
-
- dev->num_tc = num_tc;
- return 0;
-}
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
+void netdev_reset_tc(struct net_device *dev);
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
static inline
int netdev_get_num_tc(struct net_device *dev)
@@ -2154,7 +2144,10 @@ struct napi_gro_cb {
/* Used to determine if flush_id can be ignored */
u8 is_atomic:1;
- /* 5 bit hole */
+ /* Number of gro_receive callbacks this packet already went through */
+ u8 recursion_counter:4;
+
+ /* 1 bit hole */
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@@ -2165,6 +2158,40 @@ struct napi_gro_cb {
#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
+#define GRO_RECURSION_LIMIT 15
+static inline int gro_recursion_inc_test(struct sk_buff *skb)
+{
+ return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
+}
+
+typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *);
+static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
+ struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ if (unlikely(gro_recursion_inc_test(skb))) {
+ NAPI_GRO_CB(skb)->flush |= 1;
+ return NULL;
+ }
+
+ return cb(head, skb);
+}
+
+typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **,
+ struct sk_buff *);
+static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb,
+ struct sock *sk,
+ struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ if (unlikely(gro_recursion_inc_test(skb))) {
+ NAPI_GRO_CB(skb)->flush |= 1;
+ return NULL;
+ }
+
+ return cb(sk, head, skb);
+}
+
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
@@ -2634,71 +2661,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
remcsum_unadjust((__sum16 *)ptr, grc->delta);
}
-struct skb_csum_offl_spec {
- __u16 ipv4_okay:1,
- ipv6_okay:1,
- encap_okay:1,
- ip_options_okay:1,
- ext_hdrs_okay:1,
- tcp_okay:1,
- udp_okay:1,
- sctp_okay:1,
- vlan_okay:1,
- no_encapped_ipv6:1,
- no_not_encapped:1;
-};
-
-bool __skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help);
-
-static inline bool skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help)
-{
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- return false;
-
- return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help);
-}
-
-static inline bool skb_csum_offload_chk_help(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec)
-{
- bool csum_encapped;
-
- return skb_csum_offload_chk(skb, spec, &csum_encapped, true);
-}
-
-static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb)
-{
- static const struct skb_csum_offl_spec csum_offl_spec = {
- .ipv4_okay = 1,
- .ip_options_okay = 1,
- .ipv6_okay = 1,
- .vlan_okay = 1,
- .tcp_okay = 1,
- .udp_okay = 1,
- };
-
- return skb_csum_offload_chk_help(skb, &csum_offl_spec);
-}
-
-static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb)
-{
- static const struct skb_csum_offl_spec csum_offl_spec = {
- .ipv4_okay = 1,
- .ip_options_okay = 1,
- .tcp_okay = 1,
- .udp_okay = 1,
- .vlan_okay = 1,
- };
-
- return skb_csum_offload_chk_help(skb, &csum_offl_spec);
-}
-
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr,
@@ -3293,7 +3255,7 @@ int dev_get_phys_port_id(struct net_device *dev,
int dev_get_phys_port_name(struct net_device *dev,
char *name, size_t len);
int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, int fd);
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
@@ -3302,6 +3264,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
const struct sk_buff *skb);
+static __always_inline int ____dev_forward_skb(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+ unlikely(!is_skb_forwardable(dev, skb))) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ skb_scrub_packet(skb, true);
+ skb->priority = 0;
+ return 0;
+}
+
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
extern int netdev_budget;
@@ -3487,6 +3464,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
txq->xmit_lock_owner = cpu;
}
+static inline bool __netif_tx_acquire(struct netdev_queue *txq)
+{
+ __acquire(&txq->_xmit_lock);
+ return true;
+}
+
+static inline void __netif_tx_release(struct netdev_queue *txq)
+{
+ __release(&txq->_xmit_lock);
+}
+
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
@@ -3588,17 +3576,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
#define HARD_TX_LOCK(dev, txq, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
__netif_tx_lock(txq, cpu); \
+ } else { \
+ __netif_tx_acquire(txq); \
} \
}
#define HARD_TX_TRYLOCK(dev, txq) \
(((dev->features & NETIF_F_LLTX) == 0) ? \
__netif_tx_trylock(txq) : \
- true )
+ __netif_tx_acquire(txq))
#define HARD_TX_UNLOCK(dev, txq) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
__netif_tx_unlock(txq); \
+ } else { \
+ __netif_tx_release(txq); \
} \
}
@@ -3817,12 +3809,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
updev; \
updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))
-/* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
- for (iter = &(dev)->all_adj_list.upper, \
- updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
- updev; \
- updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *upper_dev,
+ void *data),
+ void *data);
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+ struct net_device *upper_dev);
void *netdev_lower_get_next_private(struct net_device *dev,
struct list_head **iter);
@@ -3855,17 +3848,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev,
struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
struct list_head **iter);
-#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
- for (iter = (dev)->all_adj_list.lower.next, \
- ldev = netdev_all_lower_get_next(dev, &(iter)); \
- ldev; \
- ldev = netdev_all_lower_get_next(dev, &(iter)))
-
-#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
- for (iter = (dev)->all_adj_list.lower.next, \
- ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
- ldev; \
- ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
+int netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *lower_dev,
+ void *data),
+ void *data);
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *lower_dev,
+ void *data),
+ void *data);
void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
@@ -3942,19 +3932,6 @@ static inline bool can_checksum_protocol(netdev_features_t features,
}
}
-/* Map an ethertype into IP protocol if possible */
-static inline int eproto_to_ipproto(int eproto)
-{
- switch (eproto) {
- case htons(ETH_P_IP):
- return IPPROTO_IP;
- case htons(ETH_P_IPV6):
- return IPPROTO_IPV6;
- default:
- return -1;
- }
-}
-
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev);
#else