diff options
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r-- | include/linux/netdevice.h | 297 |
1 files changed, 137 insertions, 160 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8d79d4ebcfe..994f7423a74b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include <uapi/linux/netdevice.h> #include <uapi/linux/if_bonding.h> #include <uapi/linux/pkt_cls.h> +#include <linux/hashtable.h> struct netpoll_info; struct device; @@ -191,6 +192,7 @@ struct net_device_stats { #ifdef CONFIG_RPS #include <linux/static_key.h> extern struct static_key rps_needed; +extern struct static_key rfs_needed; #endif struct neighbour; @@ -315,7 +317,6 @@ struct napi_struct { unsigned int gro_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL - spinlock_t poll_lock; int poll_owner; #endif struct net_device *dev; @@ -333,6 +334,16 @@ enum { NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ +}; + +enum { + NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), + NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { @@ -452,32 +463,22 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } -void __napi_complete(struct napi_struct *n); -void napi_complete_done(struct napi_struct *n, int work_done); +bool __napi_complete(struct napi_struct *n); +bool napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. + * Return false if device should avoid rearming interrupts. */ -static inline void napi_complete(struct napi_struct *n) +static inline bool napi_complete(struct napi_struct *n) { return napi_complete_done(n, 0); } /** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - -/** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context * @@ -731,8 +732,8 @@ struct xps_dev_maps { struct rcu_head rcu; struct xps_map __rcu *cpu_map[0]; }; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) +#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 @@ -788,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -799,7 +801,9 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; + bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed @@ -923,6 +927,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -935,7 +947,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1030,7 +1043,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1154,6 +1167,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, @@ -1172,7 +1189,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); @@ -1262,7 +1280,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, @@ -1439,7 +1457,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1489,6 +1506,8 @@ enum netdev_priv_flags { * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value + * @min_mtu: Interface Minimum MTU value + * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @@ -1561,8 +1580,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1604,7 +1621,7 @@ enum netdev_priv_flags { * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one - * @prio_tc_map XXX: need comments on this one + * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * @@ -1658,11 +1675,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -1711,6 +1723,8 @@ struct net_device { unsigned char dma; unsigned int mtu; + unsigned int min_mtu; + unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; @@ -1784,7 +1798,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; @@ -1800,6 +1814,9 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif unsigned long tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; @@ -1810,9 +1827,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; @@ -1907,34 +1921,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } -static inline -void netdev_reset_tc(struct net_device *dev) -{ - dev->num_tc = 0; - memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); - memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); -} - -static inline -int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) -{ - if (tc >= dev->num_tc) - return -EINVAL; - - dev->tc_to_txq[tc].count = count; - dev->tc_to_txq[tc].offset = offset; - return 0; -} - -static inline -int netdev_set_num_tc(struct net_device *dev, u8 num_tc) -{ - if (num_tc > TC_MAX_QUEUE) - return -EINVAL; - - dev->num_tc = num_tc; - return 0; -} +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); +void netdev_reset_tc(struct net_device *dev); +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); +int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) @@ -2154,7 +2144,10 @@ struct napi_gro_cb { /* Used to determine if flush_id can be ignored */ u8 is_atomic:1; - /* 5 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 1 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2165,6 +2158,40 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **, + struct sk_buff *); +static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ @@ -2634,71 +2661,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } -struct skb_csum_offl_spec { - __u16 ipv4_okay:1, - ipv6_okay:1, - encap_okay:1, - ip_options_okay:1, - ext_hdrs_okay:1, - tcp_okay:1, - udp_okay:1, - sctp_okay:1, - vlan_okay:1, - no_encapped_ipv6:1, - no_not_encapped:1; -}; - -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help); - -static inline bool skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); -} - -static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec) -{ - bool csum_encapped; - - return skb_csum_offload_chk(skb, spec, &csum_encapped, true); -} - -static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .ipv6_okay = 1, - .vlan_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - -static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - .vlan_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3293,7 +3255,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd); +int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -3302,6 +3264,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; @@ -3487,6 +3464,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) txq->xmit_lock_owner = cpu; } +static inline bool __netif_tx_acquire(struct netdev_queue *txq) +{ + __acquire(&txq->_xmit_lock); + return true; +} + +static inline void __netif_tx_release(struct netdev_queue *txq) +{ + __release(&txq->_xmit_lock); +} + static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); @@ -3588,17 +3576,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) #define HARD_TX_LOCK(dev, txq, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_lock(txq, cpu); \ + } else { \ + __netif_tx_acquire(txq); \ } \ } #define HARD_TX_TRYLOCK(dev, txq) \ (((dev->features & NETIF_F_LLTX) == 0) ? \ __netif_tx_trylock(txq) : \ - true ) + __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_unlock(txq); \ + } else { \ + __netif_tx_release(txq); \ } \ } @@ -3817,12 +3809,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); @@ -3855,17 +3848,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); @@ -3942,19 +3932,6 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } -/* Map an ethertype into IP protocol if possible */ -static inline int eproto_to_ipproto(int eproto) -{ - switch (eproto) { - case htons(ETH_P_IP): - return IPPROTO_IP; - case htons(ETH_P_IPV6): - return IPPROTO_IPV6; - default: - return -1; - } -} - #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else |