diff options
Diffstat (limited to 'include/net')
202 files changed, 6691 insertions, 3029 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 9c6ec78e47a5..13abe013af21 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -331,6 +331,9 @@ enum p9_qid_t { /* size of header for zero copy read/write */ #define P9_ZC_HDR_SZ 4096 +/* maximum length of an error string */ +#define P9_ERRMAX 128 + /** * struct p9_qid - file system entity information * @type: 8-bit type &p9_qid_t @@ -551,6 +554,4 @@ struct p9_fcall { int p9_errstr2errno(char *errstr, int len); int p9_error_init(void); -int p9_trans_fd_init(void); -void p9_trans_fd_exit(void); #endif /* NET_9P_H */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index ec1d1706f43c..78ebcf782ce5 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -11,6 +11,7 @@ #include <linux/utsname.h> #include <linux/idr.h> +#include <linux/tracepoint-defs.h> /* Number of requests per row */ #define P9_ROW_MAXTAG 255 @@ -76,7 +77,7 @@ enum p9_req_status_t { struct p9_req_t { int status; int t_err; - struct kref refcount; + refcount_t refcount; wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; @@ -227,15 +228,55 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); static inline void p9_req_get(struct p9_req_t *r) { - kref_get(&r->refcount); + refcount_inc(&r->refcount); } static inline int p9_req_try_get(struct p9_req_t *r) { - return kref_get_unless_zero(&r->refcount); + return refcount_inc_not_zero(&r->refcount); } -int p9_req_put(struct p9_req_t *r); +int p9_req_put(struct p9_client *c, struct p9_req_t *r); + +/* We cannot have the real tracepoints in header files, + * use a wrapper function */ +DECLARE_TRACEPOINT(9p_fid_ref); +void do_trace_9p_fid_get(struct p9_fid *fid); +void do_trace_9p_fid_put(struct p9_fid *fid); + +/* fid reference counting helpers: + * - fids used for any length of time should always be referenced through + * p9_fid_get(), and released with p9_fid_put() + * - v9fs_fid_lookup() or similar will automatically call get for you + * and also require a put + * - the *_fid_add() helpers will stash the fid in the inode, + * at which point it is the responsibility of evict_inode() + * to call the put + * - the last put will automatically send a clunk to the server + */ +static inline struct p9_fid *p9_fid_get(struct p9_fid *fid) +{ + if (tracepoint_enabled(9p_fid_ref)) + do_trace_9p_fid_get(fid); + + refcount_inc(&fid->count); + + return fid; +} + +static inline int p9_fid_put(struct p9_fid *fid) +{ + if (!fid || IS_ERR(fid)) + return 0; + + if (tracepoint_enabled(9p_fid_ref)) + do_trace_9p_fid_put(fid); + + if (!refcount_dec_and_test(&fid->count)) + return 0; + + return p9_client_clunk(fid); +} void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 15a4e6a9dbf7..766ec07c9599 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -19,6 +19,10 @@ * @list: used to maintain a list of currently available transports * @name: the human-readable name of the transport * @maxsize: transport provided maximum packet size + * @pooled_rbuffers: currently only set for RDMA transport which pulls the + * response buffers from a shared pool, and accordingly + * we're less flexible when choosing the response message + * size in this case * @def: set if this transport should be considered the default * @create: member function to create a new connection on this transport * @close: member function to discard a connection on this transport @@ -38,6 +42,7 @@ struct p9_trans_module { struct list_head list; char *name; /* name of transport */ int maxsize; /* max message size of transport */ + bool pooled_rbuffers; int def; /* this transport should be default */ struct module *owner; int (*create)(struct p9_client *client, @@ -54,7 +59,7 @@ struct p9_trans_module { void v9fs_register_trans(struct p9_trans_module *m); void v9fs_unregister_trans(struct p9_trans_module *m); -struct p9_trans_module *v9fs_get_trans_by_name(char *s); +struct p9_trans_module *v9fs_get_trans_by_name(const char *s); struct p9_trans_module *v9fs_get_default_trans(void); void v9fs_put_trans(struct p9_trans_module *m); diff --git a/include/net/act_api.h b/include/net/act_api.h index b5b624c7e488..61f2ceb3939e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -7,6 +7,7 @@ */ #include <linux/refcount.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> @@ -44,6 +45,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -88,6 +90,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } +static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -99,6 +111,7 @@ struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; enum tca_id id; /* identifier should match kind */ + unsigned int net_id; size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, @@ -121,6 +134,9 @@ struct tc_action_ops { struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); + int (*offload_act_setup)(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack); }; struct tc_action_net { @@ -189,7 +205,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - u32 flags, struct netlink_ext_ack *extack); + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); @@ -240,6 +256,9 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_update_hw_stats(struct tc_action *action); +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); @@ -251,6 +270,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + +#else /* !CONFIG_NET_CLS_ACT */ + +static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) { + return 0; +} + #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78ea3e332688..c04f359655b8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) @@ -62,6 +64,8 @@ struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; + u8 ifa_proto; + const struct in6_addr *peer_pfx; u32 rt_priority; @@ -107,8 +111,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); -int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, @@ -403,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index cee5f83c0f11..b69ca695935c 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -66,8 +66,6 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, - ktime_t *); bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, unsigned long); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7d142e8a0550..480fa579787e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -16,17 +16,15 @@ void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); struct sock *unix_peer_get(struct sock *sk); -#define UNIX_HASH_SIZE 256 +#define UNIX_HASH_MOD (256 - 1) +#define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; -extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; struct sockaddr_un name[]; }; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..568a87c5e0d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/workqueue.h> +#include <net/sock.h> #include <uapi/linux/vm_sockets.h> #include "vsock_addr.h" @@ -77,6 +78,7 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); +void vsock_data_ready(struct sock *sk); /**** TRANSPORT ****/ @@ -134,6 +136,7 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); + int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, @@ -205,7 +208,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/include/net/amt.h b/include/net/amt.h index 7a4db8b903ee..c881bc8b673b 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -7,6 +7,9 @@ #include <linux/siphash.h> #include <linux/jhash.h> +#include <linux/netdevice.h> +#include <net/gro_cells.h> +#include <net/rtnetlink.h> enum amt_msg_type { AMT_MSG_DISCOVERY = 1, @@ -15,7 +18,7 @@ enum amt_msg_type { AMT_MSG_MEMBERSHIP_QUERY, AMT_MSG_MEMBERSHIP_UPDATE, AMT_MSG_MULTICAST_DATA, - AMT_MSG_TEARDOWM, + AMT_MSG_TEARDOWN, __AMT_MSG_MAX, }; @@ -78,6 +81,15 @@ enum amt_status { #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) +/* Gateway events only */ +enum amt_event { + AMT_EVENT_NONE, + AMT_EVENT_RECEIVE, + AMT_EVENT_SEND_DISCOVERY, + AMT_EVENT_SEND_REQUEST, + __AMT_EVENT_MAX, +}; + struct amt_header { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 type:4, @@ -292,6 +304,12 @@ struct amt_group_node { struct hlist_head sources[]; }; +#define AMT_MAX_EVENTS 16 +struct amt_events { + enum amt_event event; + struct sk_buff *skb; +}; + struct amt_dev { struct net_device *dev; struct net_device *stream_dev; @@ -308,6 +326,7 @@ struct amt_dev { struct delayed_work req_wq; /* Protected by RTNL */ struct delayed_work secret_wq; + struct work_struct event_wq; /* AMT status */ enum amt_status status; /* Generated key */ @@ -345,6 +364,10 @@ struct amt_dev { /* Used only in gateway mode */ u64 mac:48, reserved:16; + /* AMT gateway side message handler queue */ + struct amt_events events[AMT_MAX_EVENTS]; + u8 event_idx; + u8 nr_events; }; #define AMT_TOS 0xc0 diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2b..d7ef4ec71dfe 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -71,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 03d409de61ad..f8cf3629a419 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -187,18 +187,12 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; char ip_mode; } ax25_route; -static inline void ax25_hold_route(ax25_route *ax25_rt) -{ - refcount_inc(&ax25_rt->refcount); -} - void __ax25_put_route(ax25_route *ax25_rt); extern rwlock_t ax25_route_lock; @@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void) read_unlock(&ax25_route_lock); } -static inline void ax25_put_route(ax25_route *ax25_rt) -{ - if (refcount_dec_and_test(&ax25_rt->refcount)) - __ax25_put_route(ax25_rt); -} - typedef struct { char slave; /* slave_mode? */ struct timer_list slave_timer; /* timeout timer */ @@ -229,13 +217,18 @@ struct ctl_table; typedef struct ax25_dev { struct ax25_dev *next; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net_device *forward; struct ctl_table_header *sysheader; int values[AX25_MAX_VALUES]; #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; + bool device_up; } ax25_dev; typedef struct ax25_cb { @@ -243,6 +236,7 @@ typedef struct ax25_cb { ax25_address source_addr, dest_addr; ax25_digi *digipeat; ax25_dev *ax25_dev; + netdevice_tracker dev_tracker; unsigned char iamdigi; unsigned char state, modulus, pidincl; unsigned short vs, vr, va; @@ -290,6 +284,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} + +static inline void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; diff --git a/include/net/ax88796.h b/include/net/ax88796.h index 2ed23a368602..303100f08ab8 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -8,6 +8,8 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +#include <linux/types.h> + struct sk_buff; struct net_device; struct platform_device; @@ -32,8 +34,8 @@ struct ax_plat_data { const unsigned char *buf, int star_page); void (*block_input)(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset); - /* returns nonzero if a pending interrupt request might by caused by - * the ax88786. Handles all interrupts if set to NULL + /* returns nonzero if a pending interrupt request might be caused by + * the ax88796. Handles all interrupts if set to NULL */ int (*check_irq)(struct platform_device *pdev); }; diff --git a/include/net/bareudp.h b/include/net/bareudp.h index dc65a0d71d9b..17610c8d6361 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -3,21 +3,10 @@ #ifndef __NET_BAREUDP_H #define __NET_BAREUDP_H +#include <linux/netdevice.h> #include <linux/types.h> -#include <linux/skbuff.h> #include <net/rtnetlink.h> -struct bareudp_conf { - __be16 ethertype; - __be16 port; - u16 sport_min; - bool multi_proto_mode; -}; - -struct net_device *bareudp_dev_create(struct net *net, const char *name, - u8 name_assign_type, - struct bareudp_conf *info); - static inline bool netif_is_bareudp(const struct net_device *dev) { return dev->rtnl_link_ops && diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3271870fd85e..bcc5a4cd2c17 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -55,6 +55,8 @@ #define BTPROTO_CMTP 5 #define BTPROTO_HIDP 6 #define BTPROTO_AVDTP 7 +#define BTPROTO_ISO 8 +#define BTPROTO_LAST BTPROTO_ISO #define SOL_HCI 0 #define SOL_L2CAP 6 @@ -149,10 +151,51 @@ struct bt_voice { #define BT_MODE_LE_FLOWCTL 0x03 #define BT_MODE_EXT_FLOWCTL 0x04 -#define BT_PKT_STATUS 16 +#define BT_PKT_STATUS 16 #define BT_SCM_PKT_STATUS 0x03 +#define BT_ISO_QOS 17 + +#define BT_ISO_QOS_CIG_UNSET 0xff +#define BT_ISO_QOS_CIS_UNSET 0xff + +#define BT_ISO_QOS_BIG_UNSET 0xff +#define BT_ISO_QOS_BIS_UNSET 0xff + +struct bt_iso_io_qos { + __u32 interval; + __u16 latency; + __u16 sdu; + __u8 phy; + __u8 rtn; +}; + +struct bt_iso_qos { + union { + __u8 cig; + __u8 big; + }; + union { + __u8 cis; + __u8 bis; + }; + union { + __u8 sca; + __u8 sync_interval; + }; + __u8 packing; + __u8 framing; + struct bt_iso_io_qos in; + struct bt_iso_io_qos out; +}; + +#define BT_ISO_PHY_1M 0x01 +#define BT_ISO_PHY_2M 0x02 +#define BT_ISO_PHY_CODED 0x04 +#define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \ + BT_ISO_PHY_CODED) + #define BT_CODEC 19 struct bt_codec_caps { @@ -177,6 +220,8 @@ struct bt_codecs { #define BT_CODEC_TRANSPARENT 0x03 #define BT_CODEC_MSBC 0x05 +#define BT_ISO_BASE 20 + __printf(1, 2) void bt_info(const char *fmt, ...); __printf(1, 2) @@ -204,19 +249,21 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif +#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + #define bt_dev_info(hdev, fmt, ...) \ - BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ - BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err(hdev, fmt, ...) \ - BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_dbg(hdev, fmt, ...) \ - BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ - bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ enum { @@ -343,7 +390,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); -int bt_sock_wait_ready(struct sock *sk, unsigned long flags); +int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); @@ -380,6 +427,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { + struct sock *sk; u16 opcode; u8 req_flags; u8 req_event; @@ -389,6 +437,11 @@ struct hci_ctrl { }; }; +struct mgmt_ctrl { + struct hci_dev *hdev; + u16 opcode; +}; + struct bt_skb_cb { u8 pkt_type; u8 force_active; @@ -398,6 +451,7 @@ struct bt_skb_cb { struct l2cap_ctrl l2cap; struct sco_ctrl sco; struct hci_ctrl hci; + struct mgmt_ctrl mgmt; }; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) @@ -405,6 +459,8 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode +#define hci_skb_event(skb) bt_cb((skb))->hci.req_event +#define hci_skb_sk(skb) bt_cb((skb))->hci.sk static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { @@ -483,7 +539,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, struct sk_buff *skb, **frag; skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) return skb; len -= skb->len; @@ -497,8 +553,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); if (IS_ERR(tmp)) { - kfree_skb(skb); - return tmp; + return skb; } len -= tmp->len; @@ -511,6 +566,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, } int bt_to_errno(u16 code); +__u8 bt_status(int err); void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); @@ -548,8 +604,30 @@ static inline void sco_exit(void) } #endif +#if IS_ENABLED(CONFIG_BT_LE) +int iso_init(void); +int iso_exit(void); +bool iso_enabled(void); +#else +static inline int iso_init(void) +{ + return 0; +} + +static inline int iso_exit(void) +{ + return 0; +} + +static inline bool iso_enabled(void) +{ + return false; +} +#endif + int mgmt_init(void); void mgmt_exit(void); +void mgmt_cleanup(struct sock *sk); void bt_sock_reclassify_lock(struct sock *sk, int proto); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 63065bc01b76..e004ba04a9ae 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -228,17 +228,6 @@ enum { */ HCI_QUIRK_VALID_LE_STATES, - /* When this quirk is set, then erroneous data reporting - * is ignored. This is mainly due to the fact that the HCI - * Read Default Erroneous Data Reporting command is advertised, - * but not supported; these controllers often reply with unknown - * command and tend to lock up randomly. Needing a hard reset. - * - * This quirk can be set before hci_register_dev is called or - * during the hdev->setup vendor callback. - */ - HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, - /* * When this quirk is set, then the hci_suspend_notifier is not * registered. This is intended for devices which drop completely @@ -246,6 +235,34 @@ enum { * HCI after resume. */ HCI_QUIRK_NO_SUSPEND_NOTIFIER, + + /* + * When this quirk is set, LE tx power is not queried on startup + * and the min/max tx power values default to HCI_TX_POWER_INVALID. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + + /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with + * HCI_FLT_CLEAR_ALL are ignored and event filtering is + * completely avoided. A subset of the CSR controller + * clones struggle with this and instantly lock up. + * + * Note that devices using this must (separately) disable + * runtime suspend, because event filtering takes place there. + */ + HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, + + /* + * When this quirk is set, disables the use of + * HCI_OP_ENHANCED_SETUP_SYNC_CONN command to setup SCO connections. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, }; /* HCI device flags */ @@ -299,6 +316,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, HCI_SC_ENABLED, @@ -321,6 +339,7 @@ enum { HCI_LE_SCAN_INTERRUPTED, HCI_WIDEBAND_SPEECH_ENABLED, HCI_EVENT_FILTER_CONFIGURED, + HCI_PA_SYNC, HCI_DUT_MODE, HCI_VENDOR_DIAG, @@ -332,6 +351,12 @@ enum { HCI_FORCE_NO_MITM, HCI_QUALITY_REPORT, HCI_OFFLOAD_CODECS_ENABLED, + HCI_LE_SIMULTANEOUS_ROLES, + HCI_CMD_DRAIN_WORKQUEUE, + + HCI_MESH_EXPERIMENTAL, + HCI_MESH, + HCI_MESH_SENDING, __HCI_NUM_FLAGS, }; @@ -467,6 +492,7 @@ enum { #define LMP_EXT_INQ 0x01 #define LMP_SIMUL_LE_BR 0x02 #define LMP_SIMPLE_PAIR 0x08 +#define LMP_ERR_DATA_REPORTING 0x20 #define LMP_NO_FLUSH 0x40 #define LMP_LSTO 0x01 @@ -499,9 +525,11 @@ enum { #define HCI_LE_PHY_2M 0x01 #define HCI_LE_PHY_CODED 0x08 #define HCI_LE_EXT_ADV 0x10 +#define HCI_LE_PERIODIC_ADV 0x20 #define HCI_LE_CHAN_SEL_ALG2 0x40 #define HCI_LE_CIS_CENTRAL 0x10 #define HCI_LE_CIS_PERIPHERAL 0x20 +#define HCI_LE_ISO_BROADCASTER 0x40 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -558,6 +586,7 @@ enum { #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f +#define HCI_ERROR_INVALID_PARAMETERS 0x12 #define HCI_ERROR_REMOTE_USER_TERM 0x13 #define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14 #define HCI_ERROR_REMOTE_POWER_OFF 0x15 @@ -566,6 +595,7 @@ enum { #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c +#define HCI_ERROR_CANCELLED_BY_HOST 0x44 /* Flow control modes */ #define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00 @@ -594,6 +624,7 @@ enum { #define EIR_SSP_RAND_R192 0x0F /* Simple Pairing Randomizer R-192 */ #define EIR_DEVICE_ID 0x10 /* device ID */ #define EIR_APPEARANCE 0x19 /* Device appearance */ +#define EIR_SERVICE_DATA 0x16 /* Service Data */ #define EIR_LE_BDADDR 0x1B /* LE Bluetooth device address */ #define EIR_LE_ROLE 0x1C /* LE role */ #define EIR_SSP_HASH_C256 0x1D /* Simple Pairing Hash C-256 */ @@ -1046,8 +1077,8 @@ struct hci_cp_read_stored_link_key { } __packed; struct hci_rp_read_stored_link_key { __u8 status; - __u8 max_keys; - __u8 num_keys; + __le16 max_keys; + __le16 num_keys; } __packed; #define HCI_OP_DELETE_STORED_LINK_KEY 0x0c12 @@ -1057,7 +1088,7 @@ struct hci_cp_delete_stored_link_key { } __packed; struct hci_rp_delete_stored_link_key { __u8 status; - __u8 num_keys; + __le16 num_keys; } __packed; #define HCI_MAX_NAME_LENGTH 248 @@ -1842,6 +1873,22 @@ struct hci_cp_le_ext_conn_param { __le16 max_ce_len; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC 0x2044 +struct hci_cp_le_pa_create_sync { + __u8 options; + __u8 sid; + __u8 addr_type; + bdaddr_t addr; + __le16 skip; + __le16 sync_timeout; + __u8 sync_cte_type; +} __packed; + +#define HCI_OP_LE_PA_TERM_SYNC 0x2046 +struct hci_cp_le_pa_term_sync { + __le16 handle; +} __packed; + #define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS 0x203b struct hci_rp_le_read_num_supported_adv_sets { __u8 status; @@ -1876,13 +1923,6 @@ struct hci_rp_le_set_ext_adv_params { __u8 tx_power; } __packed; -#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039 -struct hci_cp_le_set_ext_adv_enable { - __u8 enable; - __u8 num_of_sets; - __u8 data[]; -} __packed; - struct hci_cp_ext_adv_set { __u8 handle; __le16 duration; @@ -1909,6 +1949,37 @@ struct hci_cp_le_set_ext_scan_rsp_data { __u8 data[]; } __packed; +#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039 +struct hci_cp_le_set_ext_adv_enable { + __u8 enable; + __u8 num_of_sets; + __u8 data[]; +} __packed; + +#define HCI_OP_LE_SET_PER_ADV_PARAMS 0x203e +struct hci_cp_le_set_per_adv_params { + __u8 handle; + __le16 min_interval; + __le16 max_interval; + __le16 periodic_properties; +} __packed; + +#define HCI_MAX_PER_AD_LENGTH 252 + +#define HCI_OP_LE_SET_PER_ADV_DATA 0x203f +struct hci_cp_le_set_per_adv_data { + __u8 handle; + __u8 operation; + __u8 length; + __u8 data[]; +} __packed; + +#define HCI_OP_LE_SET_PER_ADV_ENABLE 0x2040 +struct hci_cp_le_set_per_adv_enable { + __u8 enable; + __u8 handle; +} __packed; + #define LE_SET_ADV_DATA_OP_COMPLETE 0x03 #define LE_SET_ADV_DATA_NO_FRAG 0x01 @@ -1930,6 +2001,16 @@ struct hci_rp_le_read_transmit_power { __s8 max_le_tx_power; } __packed; +#define HCI_NETWORK_PRIVACY 0x00 +#define HCI_DEVICE_PRIVACY 0x01 + +#define HCI_OP_LE_SET_PRIVACY_MODE 0x204e +struct hci_cp_le_set_privacy_mode { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 mode; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; @@ -1956,7 +2037,7 @@ struct hci_rp_le_read_iso_tx_sync { struct hci_cis_params { __u8 cis_id; __le16 c_sdu; - __le16 p_pdu; + __le16 p_sdu; __u8 c_phy; __u8 p_phy; __u8 c_rtn; @@ -1967,7 +2048,7 @@ struct hci_cp_le_set_cig_params { __u8 cig_id; __u8 c_interval[3]; __u8 p_interval[3]; - __u8 wc_sca; + __u8 sca; __u8 packing; __u8 framing; __le16 c_latency; @@ -2010,7 +2091,78 @@ struct hci_cp_le_reject_cis { __u8 reason; } __packed; +#define HCI_OP_LE_CREATE_BIG 0x2068 +struct hci_bis { + __u8 sdu_interval[3]; + __le16 sdu; + __le16 latency; + __u8 rtn; + __u8 phy; + __u8 packing; + __u8 framing; + __u8 encryption; + __u8 bcode[16]; +} __packed; + +struct hci_cp_le_create_big { + __u8 handle; + __u8 adv_handle; + __u8 num_bis; + struct hci_bis bis; +} __packed; + +#define HCI_OP_LE_TERM_BIG 0x206a +struct hci_cp_le_term_big { + __u8 handle; + __u8 reason; +} __packed; + +#define HCI_OP_LE_BIG_CREATE_SYNC 0x206b +struct hci_cp_le_big_create_sync { + __u8 handle; + __le16 sync_handle; + __u8 encryption; + __u8 bcode[16]; + __u8 mse; + __le16 timeout; + __u8 num_bis; + __u8 bis[0]; +} __packed; + +#define HCI_OP_LE_BIG_TERM_SYNC 0x206c +struct hci_cp_le_big_term_sync { + __u8 handle; +} __packed; + +#define HCI_OP_LE_SETUP_ISO_PATH 0x206e +struct hci_cp_le_setup_iso_path { + __le16 handle; + __u8 direction; + __u8 path; + __u8 codec; + __le16 codec_cid; + __le16 codec_vid; + __u8 delay[3]; + __u8 codec_cfg_len; + __u8 codec_cfg[0]; +} __packed; + +struct hci_rp_le_setup_iso_path { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_SET_HOST_FEATURE 0x2074 +struct hci_cp_le_set_host_feature { + __u8 bit_number; + __u8 bit_value; +} __packed; + /* ---- HCI Events ---- */ +struct hci_ev_status { + __u8 status; +} __packed; + #define HCI_EV_INQUIRY_COMPLETE 0x01 #define HCI_EV_INQUIRY_RESULT 0x02 @@ -2023,6 +2175,11 @@ struct inquiry_info { __le16 clock_offset; } __packed; +struct hci_ev_inquiry_result { + __u8 num; + struct inquiry_info info[]; +}; + #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { __u8 status; @@ -2134,7 +2291,7 @@ struct hci_comp_pkts_info { } __packed; struct hci_ev_num_comp_pkts { - __u8 num_hndl; + __u8 num; struct hci_comp_pkts_info handles[]; } __packed; @@ -2184,7 +2341,7 @@ struct hci_ev_pscan_rep_mode { } __packed; #define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22 -struct inquiry_info_with_rssi { +struct inquiry_info_rssi { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2192,7 +2349,7 @@ struct inquiry_info_with_rssi { __le16 clock_offset; __s8 rssi; } __packed; -struct inquiry_info_with_rssi_and_pscan_mode { +struct inquiry_info_rssi_pscan { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2201,6 +2358,10 @@ struct inquiry_info_with_rssi_and_pscan_mode { __le16 clock_offset; __s8 rssi; } __packed; +struct hci_ev_inquiry_result_rssi { + __u8 num; + __u8 data[]; +} __packed; #define HCI_EV_REMOTE_EXT_FEATURES 0x23 struct hci_ev_remote_ext_features { @@ -2255,6 +2416,11 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +struct hci_ev_ext_inquiry_result { + __u8 num; + struct extended_inquiry_info info[]; +} __packed; + #define HCI_EV_KEY_REFRESH_COMPLETE 0x30 struct hci_ev_key_refresh_complete { __u8 status; @@ -2422,13 +2588,18 @@ struct hci_ev_le_conn_complete { #define HCI_EV_LE_ADVERTISING_REPORT 0x02 struct hci_ev_le_advertising_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; __u8 data[]; } __packed; +struct hci_ev_le_advertising_report { + __u8 num; + struct hci_ev_le_advertising_info info[]; +} __packed; + #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 struct hci_ev_le_conn_update_complete { __u8 status; @@ -2472,7 +2643,7 @@ struct hci_ev_le_data_len_change { #define HCI_EV_LE_DIRECT_ADV_REPORT 0x0B struct hci_ev_le_direct_adv_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 direct_addr_type; @@ -2480,6 +2651,11 @@ struct hci_ev_le_direct_adv_info { __s8 rssi; } __packed; +struct hci_ev_le_direct_adv_report { + __u8 num; + struct hci_ev_le_direct_adv_info info[]; +} __packed; + #define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c struct hci_ev_le_phy_update_complete { __u8 status; @@ -2489,8 +2665,8 @@ struct hci_ev_le_phy_update_complete { } __packed; #define HCI_EV_LE_EXT_ADV_REPORT 0x0d -struct hci_ev_le_ext_adv_report { - __le16 evt_type; +struct hci_ev_le_ext_adv_info { + __le16 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 primary_phy; @@ -2498,11 +2674,28 @@ struct hci_ev_le_ext_adv_report { __u8 sid; __u8 tx_power; __s8 rssi; - __le16 interval; - __u8 direct_addr_type; + __le16 interval; + __u8 direct_addr_type; bdaddr_t direct_addr; - __u8 length; - __u8 data[]; + __u8 length; + __u8 data[]; +} __packed; + +struct hci_ev_le_ext_adv_report { + __u8 num; + struct hci_ev_le_ext_adv_info info[]; +} __packed; + +#define HCI_EV_LE_PA_SYNC_ESTABLISHED 0x0e +struct hci_ev_le_pa_sync_established { + __u8 status; + __le16 handle; + __u8 sid; + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 phy; + __le16 interval; + __u8 clock_accuracy; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a @@ -2556,6 +2749,55 @@ struct hci_evt_le_cis_req { __u8 cis_id; } __packed; +#define HCI_EVT_LE_CREATE_BIG_COMPLETE 0x1b +struct hci_evt_le_create_big_complete { + __u8 status; + __u8 handle; + __u8 sync_delay[3]; + __u8 transport_delay[3]; + __u8 phy; + __u8 nse; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __le16 interval; + __u8 num_bis; + __le16 bis_handle[]; +} __packed; + +#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +struct hci_evt_le_big_sync_estabilished { + __u8 status; + __u8 handle; + __u8 latency[3]; + __u8 nse; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __le16 interval; + __u8 num_bis; + __le16 bis[]; +} __packed; + +#define HCI_EVT_LE_BIG_INFO_ADV_REPORT 0x22 +struct hci_evt_le_big_info_adv_report { + __le16 sync_handle; + __u8 num_bis; + __u8 nse; + __le16 iso_interval; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __u8 sdu_interval[3]; + __le16 max_sdu; + __u8 phy; + __u8 framing; + __u8 encryption; +} __packed; + #define HCI_EV_VENDOR 0xff /* Internal events generated by Bluetooth stack */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dd8840e70e25..c54bc71254af 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -30,11 +30,15 @@ #include <linux/rculist.h> #include <net/bluetooth/hci.h> +#include <net/bluetooth/hci_sync.h> #include <net/bluetooth/hci_sock.h> /* HCI priority */ #define HCI_PRIO_MAX 7 +/* HCI maximum id value */ +#define HCI_MAX_ID 10000 + /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; @@ -87,6 +91,7 @@ struct discovery_state { u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; + unsigned long name_resolve_timeout; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -121,6 +126,7 @@ struct hci_conn_hash { unsigned int acl_num; unsigned int amp_num; unsigned int sco_num; + unsigned int iso_num; unsigned int le_num; unsigned int le_num_peripheral; }; @@ -150,23 +156,20 @@ struct bdaddr_list_with_irk { u8 local_irk[16]; }; +/* Bitmask of connection flags */ +enum hci_conn_flags { + HCI_CONN_FLAG_REMOTE_WAKEUP = 1, + HCI_CONN_FLAG_DEVICE_PRIVACY = 2, +}; +typedef u8 hci_conn_flags_t; + struct bdaddr_list_with_flags { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; - u32 current_flags; + hci_conn_flags_t flags; }; -enum hci_conn_flags { - HCI_CONN_FLAG_REMOTE_WAKEUP, - HCI_CONN_FLAG_MAX -}; - -#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) - -/* Make sure number of flags doesn't exceed sizeof(current_flags) */ -static_assert(HCI_CONN_FLAG_MAX < 32); - struct bt_uuid { struct list_head list; u8 uuid[16]; @@ -232,8 +235,10 @@ struct oob_data { struct adv_info { struct list_head list; - bool enabled; - bool pending; + bool enabled; + bool pending; + bool periodic; + __u8 mesh; __u8 instance; __u32 flags; __u16 timeout; @@ -241,8 +246,12 @@ struct adv_info { __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; + bool adv_data_changed; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; + bool scan_rsp_changed; + __u16 per_adv_data_len; + __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH]; __s8 tx_power; __u32 min_interval; __u32 max_interval; @@ -256,6 +265,24 @@ struct adv_info { #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F +#define DATA_CMP(_d1, _l1, _d2, _l2) \ + (_l1 == _l2 ? memcmp(_d1, _d2, _l1) : _l1 - _l2) + +#define ADV_DATA_CMP(_adv, _data, _len) \ + DATA_CMP((_adv)->adv_data, (_adv)->adv_data_len, _data, _len) + +#define SCAN_RSP_CMP(_adv, _data, _len) \ + DATA_CMP((_adv)->scan_rsp_data, (_adv)->scan_rsp_len, _data, _len) + +struct monitored_device { + struct list_head list; + + bdaddr_t bdaddr; + __u8 addr_type; + __u16 handle; + bool notified; +}; + struct adv_pattern { struct list_head list; __u8 ad_type; @@ -292,6 +319,9 @@ struct adv_monitor { #define HCI_MAX_SHORT_NAME_LENGTH 10 +#define HCI_CONN_HANDLE_UNSET 0xffff +#define HCI_CONN_HANDLE_MAX 0x0eff + /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 @@ -343,6 +373,8 @@ struct hci_dev { __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; __u8 le_states[8]; + __u8 mesh_ad_types[16]; + __u8 mesh_send_ref; __u8 commands[64]; __u8 hci_ver; __u16 hci_rev; @@ -351,8 +383,8 @@ struct hci_dev { __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; - __u8 stored_max_keys; - __u8 stored_num_keys; + __u16 stored_max_keys; + __u16 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u8 err_data_reporting; @@ -449,13 +481,16 @@ struct hci_dev { unsigned int acl_cnt; unsigned int sco_cnt; unsigned int le_cnt; + unsigned int iso_cnt; unsigned int acl_mtu; unsigned int sco_mtu; unsigned int le_mtu; + unsigned int iso_mtu; unsigned int acl_pkts; unsigned int sco_pkts; unsigned int le_pkts; + unsigned int iso_pkts; __u16 block_len; __u16 block_mtu; @@ -475,6 +510,11 @@ struct hci_dev { struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; + struct work_struct cmd_sync_work; + struct list_head cmd_sync_work_list; + struct mutex cmd_sync_work_lock; + struct work_struct cmd_sync_cancel_work; + struct work_struct reenable_adv_work; __u16 discov_timeout; struct delayed_work discov_off; @@ -488,11 +528,6 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; - struct work_struct discov_update; - struct work_struct bg_scan_update; - struct work_struct scan_update; - struct work_struct connectable_update; - struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -501,6 +536,7 @@ struct hci_dev { struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; + struct sk_buff *recv_event; struct mutex req_lock; wait_queue_head_t req_wait_q; @@ -519,7 +555,6 @@ struct hci_dev { bool advertising_paused; struct notifier_block suspend_notifier; - struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; bool scanning_paused; @@ -528,11 +563,9 @@ struct hci_dev { bdaddr_t wake_addr; u8 wake_addr_type; - wait_queue_head_t suspend_wait_q; - DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); - struct hci_conn_hash conn_hash; + struct list_head mesh_pending; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -562,12 +595,15 @@ struct hci_dev { struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); + hci_conn_flags_t conn_flags; __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 adv_data_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; __u8 scan_rsp_data_len; + __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH]; + __u8 per_adv_data_len; struct list_head adv_instances; unsigned int adv_instance_cnt; @@ -583,6 +619,8 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; + struct delayed_work mesh_send_done; + enum { INTERLEAVE_SCAN_NONE, INTERLEAVE_SCAN_NO_FILTER, @@ -591,6 +629,9 @@ struct hci_dev { struct delayed_work interleave_scan; + struct list_head monitored_devices; + bool advmon_pend_notify; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif @@ -603,6 +644,7 @@ struct hci_dev { #if IS_ENABLED(CONFIG_BT_AOSPEXT) bool aosp_capable; + bool aosp_quality_report; #endif int (*open)(struct hci_dev *hdev); @@ -631,6 +673,7 @@ enum conn_reasons { CONN_REASON_PAIR_DEVICE, CONN_REASON_L2CAP_CHAN, CONN_REASON_SCO_CONNECT, + CONN_REASON_ISO_CONNECT, }; struct hci_conn { @@ -648,6 +691,7 @@ struct hci_conn { __u8 resp_addr_type; __u8 adv_instance; __u16 handle; + __u16 sync_handle; __u16 state; __u8 mode; __u8 type; @@ -678,11 +722,14 @@ struct hci_conn { __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; + __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; + __u8 le_per_adv_data_len; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; __s8 tx_power; __s8 max_tx_power; + struct bt_iso_qos iso_qos; unsigned long flags; enum conn_reasons conn_reason; @@ -713,6 +760,7 @@ struct hci_conn { struct hci_dev *hdev; void *l2cap_data; void *sco_data; + void *iso_data; struct amp_mgr *amp_mgr; struct hci_conn *link; @@ -721,6 +769,8 @@ struct hci_conn { void (*connect_cfm_cb) (struct hci_conn *conn, u8 status); void (*security_cfm_cb) (struct hci_conn *conn, u8 status); void (*disconn_cfm_cb) (struct hci_conn *conn, u8 reason); + + void (*cleanup)(struct hci_conn *conn); }; struct hci_chan { @@ -756,7 +806,8 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - u32 current_flags; + hci_conn_flags_t flags; + u8 privacy_mode; }; extern struct list_head hci_dev_list; @@ -781,6 +832,12 @@ extern struct mutex hci_cb_list_lock; hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ } while (0) +#define hci_dev_le_state_simultaneous(hdev) \ + (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (hdev->le_states[4] & 0x08) && /* Central */ \ + (hdev->le_states[4] & 0x40) && /* Peripheral */ \ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ + /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); @@ -801,6 +858,21 @@ static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_BT_LE) +int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); +void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +#else +static inline int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 *flags) +{ + return 0; +} +static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, + u16 flags) +{ +} +#endif + /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ #define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */ @@ -885,6 +957,7 @@ enum { HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, HCI_CONN_AUTH_FAILURE, + HCI_CONN_PER_ADV, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) @@ -921,6 +994,9 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num++; break; + case ISO_LINK: + h->iso_num++; + break; } } @@ -947,6 +1023,9 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num--; break; + case ISO_LINK: + h->iso_num--; + break; } } @@ -963,6 +1042,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case SCO_LINK: case ESCO_LINK: return h->sco_num; + case ISO_LINK: + return h->iso_num; default: return 0; } @@ -972,7 +1053,7 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->amp_num + c->sco_num + c->le_num; + return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; } static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) @@ -995,6 +1076,29 @@ static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) return type; } +static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, + bdaddr_t *ba, + __u8 big, __u8 bis) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (bacmp(&c->dst, ba) || c->type != ISO_LINK) + continue; + + if (c->iso_qos.big == big && c->iso_qos.bis == bis) { + rcu_read_unlock(); + return c; + } + } + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev, __u16 handle) { @@ -1058,6 +1162,76 @@ static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, + bdaddr_t *ba, + __u8 ba_type) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK) + continue; + + if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev, + __u8 handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK) + continue; + + if (handle == c->iso_qos.cig) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, + __u8 handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK) + continue; + + if (handle == c->iso_qos.big) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { @@ -1078,6 +1252,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } +typedef void (*hci_conn_func_t)(struct hci_conn *conn, void *data); +static inline void hci_conn_hash_list_state(struct hci_dev *hdev, + hci_conn_func_t func, __u8 type, + __u16 state, void *data) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + if (!func) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && c->state == state) + func(c, data); + } + + rcu_read_unlock(); +} + static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1101,6 +1296,8 @@ static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); +bool hci_iso_setup_path(struct hci_conn *conn); +int hci_le_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); @@ -1119,13 +1316,23 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role, - bdaddr_t *direct_rpa); + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting, struct bt_codec *codec); +struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos); +struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos); +struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos, + __u8 data_len, __u8 *data); +int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, + __u8 sid); +int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos, + __u16 sync_handle, __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1134,7 +1341,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); -void hci_le_conn_failed(struct hci_conn *conn, u8 status); +void hci_conn_failed(struct hci_conn *conn, u8 status); /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an @@ -1264,6 +1471,8 @@ void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); void hci_release_dev(struct hci_dev *hdev); +int hci_register_suspend_notifier(struct hci_dev *hdev); +int hci_unregister_suspend_notifier(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); @@ -1370,11 +1579,15 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); -int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, - u16 adv_data_len, u8 *adv_data, - u16 scan_rsp_len, u8 *scan_rsp_data, - u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval); +struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration, s8 tx_power, + u32 min_interval, u32 max_interval, + u8 mesh_handle); +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u8 data_len, u8 *data, + u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data); @@ -1385,12 +1598,9 @@ bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance); void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); -int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); -int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); -bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - int *err); -bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err); -bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err); +int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); +int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle); +int hci_remove_all_adv_monitor(struct hci_dev *hdev); bool hci_is_adv_monitoring(struct hci_dev *hdev); int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev); @@ -1461,11 +1671,21 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) +#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) + /* Use LL Privacy based address resolution if supported */ -#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \ + hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY)) -/* Use enhanced synchronous connection if command is supported */ -#define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) +#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ + (hdev->commands[39] & 0x04)) + +/* Use enhanced synchronous connection if command is supported and its quirk + * has not been set. + */ +#define enhanced_sync_conn_capable(dev) \ + (((dev)->commands[29] & 0x08) && \ + !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ @@ -1476,6 +1696,27 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) +/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789: + * + * C24: Mandatory if the LE Controller supports Connection State and either + * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported + */ +#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \ + ext_adv_capable(dev)) + +/* Periodic advertising support */ +#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) + +/* CIS Master/Slave and BIS support */ +#define iso_capable(dev) (cis_capable(dev) || bis_capable(dev)) +#define cis_capable(dev) \ + (cis_central_capable(dev) || cis_peripheral_capable(dev)) +#define cis_central_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL) +#define cis_peripheral_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) +#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) + /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 @@ -1490,6 +1731,9 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); + case ISO_LINK: + return iso_connect_ind(hdev, bdaddr, flags); + default: BT_ERR("unknown link type %d", type); return -EINVAL; @@ -1690,10 +1934,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); @@ -1701,11 +1941,10 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); +void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); - -struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); +void *hci_recv_event_data(struct hci_dev *hdev, __u8 event); u32 hci_conn_get_phy(struct hci_conn *conn); @@ -1764,6 +2003,13 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ #define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ +#define DISCOV_LE_PER_ADV_INT_MIN 0x00A0 /* 200 msec */ +#define DISCOV_LE_PER_ADV_INT_MAX 0x00A0 /* 200 msec */ +#define DISCOV_LE_ADV_MESH_MIN 0x00A0 /* 100 msec */ +#define DISCOV_LE_ADV_MESH_MAX 0x00A0 /* 100 msec */ +#define INTERVAL_TO_MS(x) (((x) * 10) / 0x10) + +#define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); @@ -1806,7 +2052,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); -void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); @@ -1814,7 +2059,8 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, - u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); @@ -1831,8 +2077,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); -void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); -void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); @@ -1840,9 +2084,10 @@ void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); -int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); -int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, + bdaddr_t *bdaddr, u8 addr_type); +int hci_abort_conn(struct hci_conn *conn, u8 reason); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h new file mode 100644 index 000000000000..17f5a4c32f36 --- /dev/null +++ b/include/net/bluetooth/hci_sync.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); +typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, + int err); + +struct hci_cmd_sync_work_entry { + struct list_head list; + hci_cmd_sync_work_func_t func; + void *data; + hci_cmd_sync_work_destroy_t destroy; +}; + +struct adv_info; +/* Function with sync suffix shall not be called with hdev->lock held as they + * wait the command to complete and in the meantime an event could be received + * which could attempt to acquire hdev->lock causing a deadlock. + */ +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout); +struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); +int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); + +void hci_cmd_sync_init(struct hci_dev *hdev); +void hci_cmd_sync_clear(struct hci_dev *hdev); +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); + +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev); +int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); + +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr); + +int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, + bool rpa, u8 *own_addr_type); + +int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data(struct hci_dev *hdev, u8 instance); +int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, + bool force); + +int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance); +int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_advertising_sync(struct hci_dev *hdev); +int hci_enable_advertising(struct hci_dev *hdev); + +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, + u8 *data, u32 flags, u16 min_interval, + u16 max_interval, u16 sync_interval); + +int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); +int hci_disable_advertising_sync(struct hci_dev *hdev); +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); +int hci_update_passive_scan_sync(struct hci_dev *hdev); +int hci_update_passive_scan(struct hci_dev *hdev); +int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); +int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); +int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val); +int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); + +int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); +int hci_update_scan_sync(struct hci_dev *hdev); +int hci_update_scan(struct hci_dev *hdev); + +int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); +int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, + struct sock *sk); +int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance); +struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, + struct sock *sk); + +int hci_reset_sync(struct hci_dev *hdev); +int hci_dev_open_sync(struct hci_dev *hdev); +int hci_dev_close_sync(struct hci_dev *hdev); + +int hci_powered_update_sync(struct hci_dev *hdev); +int hci_set_powered_sync(struct hci_dev *hdev, u8 val); + +int hci_update_discoverable_sync(struct hci_dev *hdev); +int hci_update_discoverable(struct hci_dev *hdev); + +int hci_update_connectable_sync(struct hci_dev *hdev); + +int hci_start_discovery_sync(struct hci_dev *hdev); +int hci_stop_discovery_sync(struct hci_dev *hdev); + +int hci_suspend_sync(struct hci_dev *hdev); +int hci_resume_sync(struct hci_dev *hdev); + +struct hci_conn; + +int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); + +int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle); + +int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason); + +int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); + +int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); diff --git a/include/net/bluetooth/iso.h b/include/net/bluetooth/iso.h new file mode 100644 index 000000000000..3f4fe8b78e1b --- /dev/null +++ b/include/net/bluetooth/iso.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2022 Intel Corporation + */ + +#ifndef __ISO_H +#define __ISO_H + +/* ISO defaults */ +#define ISO_DEFAULT_MTU 251 +#define ISO_MAX_NUM_BIS 0x1f + +/* ISO socket broadcast address */ +struct sockaddr_iso_bc { + bdaddr_t bc_bdaddr; + __u8 bc_bdaddr_type; + __u8 bc_sid; + __u8 bc_num_bis; + __u8 bc_bis[ISO_MAX_NUM_BIS]; +}; + +/* ISO socket address */ +struct sockaddr_iso { + sa_family_t iso_family; + bdaddr_t iso_bdaddr; + __u8 iso_bdaddr_type; + struct sockaddr_iso_bc iso_bc[]; +}; + +#endif /* __ISO_H */ diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3c4f550e5a8b..2f766e3437ce 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -847,6 +847,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23a0524061b7..743f6f59dff8 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -696,7 +696,7 @@ struct mgmt_cp_set_blocked_keys { #define MGMT_READ_CONTROLLER_CAP_SIZE 0 struct mgmt_rp_read_controller_cap { __le16 cap_len; - __u8 cap[0]; + __u8 cap[]; } __packed; #define MGMT_OP_READ_EXP_FEATURES_INFO 0x0049 @@ -837,6 +837,42 @@ struct mgmt_cp_add_adv_patterns_monitor_rssi { struct mgmt_adv_pattern patterns[]; } __packed; #define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8 +#define MGMT_OP_SET_MESH_RECEIVER 0x0057 +struct mgmt_cp_set_mesh { + __u8 enable; + __le16 window; + __le16 period; + __u8 num_ad_types; + __u8 ad_types[]; +} __packed; +#define MGMT_SET_MESH_RECEIVER_SIZE 6 + +#define MGMT_OP_MESH_READ_FEATURES 0x0058 +#define MGMT_MESH_READ_FEATURES_SIZE 0 +#define MESH_HANDLES_MAX 3 +struct mgmt_rp_mesh_read_features { + __le16 index; + __u8 max_handles; + __u8 used_handles; + __u8 handles[MESH_HANDLES_MAX]; +} __packed; + +#define MGMT_OP_MESH_SEND 0x0059 +struct mgmt_cp_mesh_send { + struct mgmt_addr_info addr; + __le64 instant; + __le16 delay; + __u8 cnt; + __u8 adv_data_len; + __u8 adv_data[]; +} __packed; +#define MGMT_MESH_SEND_SIZE 19 + +#define MGMT_OP_MESH_SEND_CANCEL 0x005A +struct mgmt_cp_mesh_send_cancel { + __u8 handle; +} __packed; +#define MGMT_MESH_SEND_CANCEL_SIZE 1 #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { @@ -936,10 +972,11 @@ struct mgmt_ev_auth_failed { __u8 status; } __packed; -#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 -#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 -#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 -#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 +#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 +#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 +#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10 #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { @@ -1103,3 +1140,35 @@ struct mgmt_ev_controller_resume { #define MGMT_WAKE_REASON_NON_BT_WAKE 0x0 #define MGMT_WAKE_REASON_UNEXPECTED 0x1 #define MGMT_WAKE_REASON_REMOTE_WAKE 0x2 + +#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND 0x002f +struct mgmt_ev_adv_monitor_device_found { + __le16 monitor_handle; + struct mgmt_addr_info addr; + __s8 rssi; + __le32 flags; + __le16 eir_len; + __u8 eir[]; +} __packed; + +#define MGMT_EV_ADV_MONITOR_DEVICE_LOST 0x0030 +struct mgmt_ev_adv_monitor_device_lost { + __le16 monitor_handle; + struct mgmt_addr_info addr; +} __packed; + +#define MGMT_EV_MESH_DEVICE_FOUND 0x0031 +struct mgmt_ev_mesh_device_found { + struct mgmt_addr_info addr; + __s8 rssi; + __le64 instant; + __le32 flags; + __le16 eir_len; + __u8 eir[]; +} __packed; + + +#define MGMT_EV_MESH_PACKET_CMPLT 0x0032 +struct mgmt_ev_mesh_pkt_cmplt { + __u8 handle; +} __packed; diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 38785d48baff..a016f275cb01 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -15,8 +15,6 @@ #define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW) #define AD_TIMER_INTERVAL 100 /*msec*/ -#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} - #define AD_LACP_SLOW 0 #define AD_LACP_FAST 1 @@ -262,7 +260,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ struct bond_3ad_stats stats; - u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ + atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; @@ -290,7 +288,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state) } /* ========== AD Exported functions to the main bonding code ========== */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution); +void bond_3ad_initialize(struct bonding *bond); void bond_3ad_bind_slave(struct slave *slave); void bond_3ad_unbind_slave(struct slave *slave); void bond_3ad_state_machine_handler(struct work_struct *); diff --git a/include/net/bond_options.h b/include/net/bond_options.h index e64833a674eb..69292ecc0325 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -7,6 +7,14 @@ #ifndef _NET_BOND_OPTIONS_H #define _NET_BOND_OPTIONS_H +#include <linux/bits.h> +#include <linux/limits.h> +#include <linux/types.h> +#include <linux/string.h> + +struct netlink_ext_ack; +struct nlattr; + #define BOND_OPT_MAX_NAMELEN 32 #define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST) #define BOND_MODE_ALL_EX(x) (~(x)) @@ -65,19 +73,29 @@ enum { BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, + BOND_OPT_MISSED_MAX, + BOND_OPT_NS_TARGETS, + BOND_OPT_PRIO, BOND_OPT_LAST }; /* This structure is used for storing option values and for passing option * values when changing an option. The logic when used as an arg is as follows: - * - if string != NULL -> parse it, if the opt is RAW type then return it, else - * return the parse result - * - if string == NULL -> parse value + * - if value != ULLONG_MAX -> parse value + * - if string != NULL -> parse string + * - if the opt is RAW data and length less than maxlen, + * copy the data to extra storage */ + +#define BOND_OPT_EXTRA_MAXLEN 16 struct bond_opt_value { char *string; u64 value; u32 flags; + union { + char extra[BOND_OPT_EXTRA_MAXLEN]; + struct net_device *slave_dev; + }; }; struct bonding; @@ -101,7 +119,8 @@ struct bond_option { }; int __bond_opt_set(struct bonding *bond, unsigned int option, - struct bond_opt_value *val); + struct bond_opt_value *val, + struct nlattr *bad_attr, struct netlink_ext_ack *extack); int __bond_opt_set_notify(struct bonding *bond, unsigned int option, struct bond_opt_value *val); int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); @@ -117,18 +136,29 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); * When value is ULLONG_MAX then string will be used. */ static inline void __bond_opt_init(struct bond_opt_value *optval, - char *string, u64 value) + char *string, u64 value, + void *extra, size_t extra_len) { memset(optval, 0, sizeof(*optval)); optval->value = ULLONG_MAX; - if (value == ULLONG_MAX) - optval->string = string; - else + if (value != ULLONG_MAX) optval->value = value; + else if (string) + optval->string = string; + + if (extra && extra_len <= BOND_OPT_EXTRA_MAXLEN) + memcpy(optval->extra, extra, extra_len); } -#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) -#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0) +#define bond_opt_initextra(optval, extra, extra_len) \ + __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) +#define bond_opt_slave_initval(optval, slave_dev, value) \ + __bond_opt_init(optval, NULL, value, slave_dev, sizeof(struct net_device *)) void bond_option_arp_ip_targets_clear(struct bonding *bond); +#if IS_ENABLED(CONFIG_IPV6) +void bond_option_ns_ip6_targets_clear(struct bonding *bond); +#endif #endif /* _NET_BOND_OPTIONS_H */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 15e083e18f75..e999f851738b 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -29,8 +29,11 @@ #include <net/bond_3ad.h> #include <net/bond_alb.h> #include <net/bond_options.h> +#include <net/ipv6.h> +#include <net/addrconf.h> #define BOND_MAX_ARP_TARGETS 16 +#define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 @@ -121,6 +124,7 @@ struct bond_params { int xmit_policy; int miimon; u8 num_peer_notif; + u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; @@ -145,6 +149,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; +#endif /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -154,8 +161,9 @@ struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ int delay; - /* all three in jiffies */ + /* all 4 in jiffies */ unsigned long last_link_up; + unsigned long last_tx; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ @@ -171,6 +179,7 @@ struct slave { u32 speed; u16 queue_id; u8 perm_hwaddr[MAX_ADDR_LEN]; + int prio; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER @@ -345,7 +354,7 @@ static inline bool bond_uses_primary(struct bonding *bond) static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { - struct slave *slave = rcu_dereference(bond->curr_active_slave); + struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } @@ -498,6 +507,15 @@ static inline int bond_is_ip_target_ok(__be32 addr) return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } +#if IS_ENABLED(CONFIG_IPV6) +static inline int bond_is_ip6_target_ok(struct in6_addr *addr) +{ + return !ipv6_addr_any(addr) && + !ipv6_addr_loopback(addr) && + !ipv6_addr_is_multicast(addr); +} +#endif + /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ @@ -523,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond, return slave->last_rx; } +static inline void slave_update_last_tx(struct slave *slave) +{ + WRITE_ONCE(slave->last_tx, jiffies); +} + +static inline unsigned long slave_last_tx(struct slave *slave) +{ + return READ_ONCE(slave->last_tx); +} + #ifdef CONFIG_NET_POLL_CONTROLLER static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, struct sk_buff *skb) @@ -627,7 +655,7 @@ struct bond_net { struct class_attribute class_attr_bonding_masters; }; -int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); +int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); @@ -698,20 +726,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, } /* Caller must hold rcu_read_lock() for read */ -static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, - const u8 *mac) -{ - struct list_head *iter; - struct slave *tmp; - - bond_for_each_slave_rcu(bond, tmp, iter) - if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) - return tmp; - - return NULL; -} - -/* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) { struct list_head *iter; @@ -748,6 +762,21 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) return -1; } +#if IS_ENABLED(CONFIG_IPV6) +static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) +{ + int i; + + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) + if (ipv6_addr_equal(&targets[i], ip)) + return i; + else if (ipv6_addr_any(&targets[i])) + break; + + return -1; +} +#endif + /* exported from bond_main.c */ extern unsigned int bond_net_id; @@ -757,9 +786,12 @@ extern struct rtnl_link_ops bond_link_ops; /* exported from bond_sysfs_slave.c */ extern const struct sysfs_ops slave_sysfs_ops; +/* exported from bond_3ad.c */ +extern const u8 lacpdu_mcast_addr[]; + static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c4898fcbf923..f90f0021f5f2 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; static inline bool net_busy_loop_on(void) { - return sysctl_net_busy_poll; + return READ_ONCE(sysctl_net_busy_poll); } static inline bool sk_can_busy_loop(const struct sock *sk) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 423f97b982ff..e09ff87146c1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -109,7 +109,12 @@ struct wiphy; * on this channel. * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted * on this channel. - * + * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band, + * this flag indicates that a 320 MHz channel cannot use this + * channel as the control or any of the secondary channels. + * This may be due to the driver or due to regulatory bandwidth + * restrictions. + * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -131,6 +136,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_4MHZ = 1<<16, IEEE80211_CHAN_8MHZ = 1<<17, IEEE80211_CHAN_16MHZ = 1<<18, + IEEE80211_CHAN_NO_320MHZ = 1<<19, + IEEE80211_CHAN_NO_EHT = 1<<20, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -361,6 +368,49 @@ struct ieee80211_sta_he_cap { }; /** + * struct ieee80211_eht_mcs_nss_supp - EHT max supported NSS per MCS + * + * See P802.11be_D1.3 Table 9-401k - "Subfields of the Supported EHT-MCS + * and NSS Set field" + * + * @only_20mhz: MCS/NSS support for 20 MHz-only STA. + * @bw: MCS/NSS support for 80, 160 and 320 MHz + * @bw._80: MCS/NSS support for BW <= 80 MHz + * @bw._160: MCS/NSS support for BW = 160 MHz + * @bw._320: MCS/NSS support for BW = 320 MHz + */ +struct ieee80211_eht_mcs_nss_supp { + union { + struct ieee80211_eht_mcs_nss_supp_20mhz_only only_20mhz; + struct { + struct ieee80211_eht_mcs_nss_supp_bw _80; + struct ieee80211_eht_mcs_nss_supp_bw _160; + struct ieee80211_eht_mcs_nss_supp_bw _320; + } __packed bw; + } __packed; +} __packed; + +#define IEEE80211_EHT_PPE_THRES_MAX_LEN 32 + +/** + * struct ieee80211_sta_eht_cap - STA's EHT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11be EHT capabilities for a STA. + * + * @has_eht: true iff EHT data is valid. + * @eht_cap_elem: Fixed portion of the eht capabilities element. + * @eht_mcs_nss_supp: The supported NSS/MCS combinations. + * @eht_ppe_thres: Holds the PPE Thresholds data. + */ +struct ieee80211_sta_eht_cap { + bool has_eht; + struct ieee80211_eht_cap_elem_fixed eht_cap_elem; + struct ieee80211_eht_mcs_nss_supp eht_mcs_nss_supp; + u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; +}; + +/** * struct ieee80211_sband_iftype_data - sband data per interface type * * This structure encapsulates sband data that is relevant for the @@ -371,6 +421,7 @@ struct ieee80211_sta_he_cap { * @he_cap: holds the HE capabilities * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a * 6 GHz band channel (and 0 may be valid value). + * @eht_cap: STA's EHT capabilities * @vendor_elems: vendor element(s) to advertise * @vendor_elems.data: vendor element(s) data * @vendor_elems.len: vendor element(s) length @@ -379,6 +430,7 @@ struct ieee80211_sband_iftype_data { u16 types_mask; struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; struct { const u8 *data; unsigned int len; @@ -445,7 +497,7 @@ struct ieee80211_edmg { * This structure describes most essential parameters needed * to describe 802.11ah S1G capabilities for a STA. * - * @s1g_supported: is STA an S1G STA + * @s1g: is STA an S1G STA * @cap: S1G capabilities information * @nss_mcs: Supported NSS MCS set */ @@ -562,6 +614,26 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype + * + * Return: pointer to the struct ieee80211_sta_eht_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_eht_cap * +ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband, + enum nl80211_iftype iftype) +{ + const struct ieee80211_sband_iftype_data *data = + ieee80211_get_sband_iftype_data(sband, iftype); + + if (data && data->eht_cap.has_eht) + return &data->eht_cap; + + return NULL; +} + +/** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for @@ -867,19 +939,18 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, enum nl80211_iftype iftype); /** - * ieee80211_chandef_rate_flags - returns rate flags for a channel + * ieee80211_chanwidth_rate_flags - return rate flags for channel width + * @width: the channel width of the channel * * In some channel types, not all rates may be used - for example CCK * rates may not be used in 5/10 MHz channels. * - * @chandef: channel definition for the channel - * - * Returns: rate flags which apply for this channel + * Returns: rate flags which apply for this channel width */ static inline enum ieee80211_rate_flags -ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) +ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width) { - switch (chandef->width) { + switch (width) { case NL80211_CHAN_WIDTH_5: return IEEE80211_RATE_SUPPORTS_5MHZ; case NL80211_CHAN_WIDTH_10: @@ -891,6 +962,20 @@ ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) } /** + * ieee80211_chandef_rate_flags - returns rate flags for a channel + * @chandef: channel definition for the channel + * + * See ieee80211_chanwidth_rate_flags(). + * + * Returns: rate flags which apply for this channel + */ +static inline enum ieee80211_rate_flags +ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) +{ + return ieee80211_chanwidth_rate_flags(chandef->width); +} + +/** * ieee80211_chandef_max_power - maximum transmission power for the chandef * * In some regulations, the transmit power may depend on the configured channel @@ -991,6 +1076,7 @@ struct survey_info { }; #define CFG80211_MAX_WEP_KEYS 4 +#define CFG80211_MAX_NUM_AKM_SUITES 10 /** * struct cfg80211_crypto_settings - Crypto settings @@ -1042,7 +1128,7 @@ struct cfg80211_crypto_settings { int n_ciphers_pairwise; u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES]; int n_akm_suites; - u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; + u32 akm_suites[CFG80211_MAX_NUM_AKM_SUITES]; bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; @@ -1088,6 +1174,7 @@ struct cfg80211_mbssid_elems { /** * struct cfg80211_beacon_data - beacon data + * @link_id: the link ID for the AP MLD link sending this beacon * @head: head portion of beacon (before TIM IE) * or %NULL if not changed * @tail: tail portion of beacon (after TIM IE) @@ -1113,8 +1200,13 @@ struct cfg80211_mbssid_elems { * Token (measurement type 11) * @lci_len: LCI data length * @civicloc_len: Civic location data length + * @he_bss_color: BSS Color settings + * @he_bss_color_valid: indicates whether bss color + * attribute is present in beacon data or not. */ struct cfg80211_beacon_data { + unsigned int link_id; + const u8 *head, *tail; const u8 *beacon_ies; const u8 *proberesp_ies; @@ -1132,6 +1224,8 @@ struct cfg80211_beacon_data { size_t probe_resp_len; size_t lci_len; size_t civicloc_len; + struct cfg80211_he_bss_color he_bss_color; + bool he_bss_color_valid; }; struct mac_address { @@ -1188,17 +1282,6 @@ struct cfg80211_unsol_bcast_probe_resp { }; /** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - -/** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. @@ -1226,6 +1309,8 @@ enum cfg80211_ap_settings_flags { * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) * @he_cap: HE capabilities (or %NULL if HE isn't enabled) + * @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled) + * @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time @@ -1233,7 +1318,6 @@ enum cfg80211_ap_settings_flags { * @sae_h2e_required: stations must support direct H2E technique in SAE * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings - * @he_bss_color: BSS Color settings * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters @@ -1263,11 +1347,12 @@ struct cfg80211_ap_settings { const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; const struct ieee80211_he_operation *he_oper; + const struct ieee80211_eht_cap_elem *eht_cap; + const struct ieee80211_eht_operation *eht_oper; bool ht_required, vht_required, he_required, sae_h2e_required; bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; - struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; @@ -1308,8 +1393,8 @@ struct cfg80211_csa_settings { * Used for bss color change * * @beacon_color_change: beacon data while performing the color countdown - * @counter_offsets_beacon: offsets of the counters within the beacon (tail) - * @counter_offsets_presp: offsets of the counters within the probe response + * @counter_offset_beacon: offsets of the counters within the beacon (tail) + * @counter_offset_presp: offsets of the counters within the probe response * @beacon_next: beacon data to be used after the color change * @count: number of beacons until the color change * @color: the color used after the change @@ -1352,6 +1437,7 @@ struct iface_combination_params { * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state + * @STATION_PARAM_APPLY_STA_TXPOWER: apply tx power for STA * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. @@ -1360,7 +1446,6 @@ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), STATION_PARAM_APPLY_PLINK_STATE = BIT(2), - STATION_PARAM_APPLY_STA_TXPOWER = BIT(3), }; /** @@ -1384,14 +1469,66 @@ struct sta_txpwr { }; /** - * struct station_parameters - station parameters + * struct link_station_parameters - link station parameters * - * Used to change and create a new station. + * Used to change and create a new link station. * - * @vlan: vlan interface station should belong to + * @mld_mac: MAC address of the station + * @link_id: the link id (-1 for non-MLD station) + * @link_mac: MAC address of the link * @supported_rates: supported rates in IEEE 802.11 format * (or NULL for no change) * @supported_rates_len: number of supported rates + * @ht_capa: HT capabilities of station + * @vht_capa: VHT capabilities of station + * @opmode_notif: operating mode field from Operating Mode Notification + * @opmode_notif_used: information if operating mode field is used + * @he_capa: HE capabilities of station + * @he_capa_len: the length of the HE capabilities + * @txpwr: transmit power for an associated station + * @txpwr_set: txpwr field is set + * @he_6ghz_capa: HE 6 GHz Band capabilities of station + * @eht_capa: EHT capabilities of station + * @eht_capa_len: the length of the EHT capabilities + */ +struct link_station_parameters { + const u8 *mld_mac; + int link_id; + const u8 *link_mac; + const u8 *supported_rates; + u8 supported_rates_len; + const struct ieee80211_ht_cap *ht_capa; + const struct ieee80211_vht_cap *vht_capa; + u8 opmode_notif; + bool opmode_notif_used; + const struct ieee80211_he_cap_elem *he_capa; + u8 he_capa_len; + struct sta_txpwr txpwr; + bool txpwr_set; + const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + const struct ieee80211_eht_cap_elem *eht_capa; + u8 eht_capa_len; +}; + +/** + * struct link_station_del_parameters - link station deletion parameters + * + * Used to delete a link station entry (or all stations). + * + * @mld_mac: MAC address of the station + * @link_id: the link id + */ +struct link_station_del_parameters { + const u8 *mld_mac; + u32 link_id; +}; + +/** + * struct station_parameters - station parameters + * + * Used to change and create a new station. + * + * @vlan: vlan interface station should belong to * @sta_flags_mask: station flags that changed * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @sta_flags_set: station flags values @@ -1402,8 +1539,6 @@ struct sta_txpwr { * @peer_aid: mesh peer AID or zero for no change * @plink_action: plink action to take * @plink_state: set the peer link state for a station - * @ht_capa: HT capabilities of station - * @vht_capa: VHT capabilities of station * @uapsd_queues: bitmap of queues configured for uapsd. same format * as the AC bitmap in the QoS info field * @max_sp: max Service Period. same format as the MAX_SP in the @@ -1420,17 +1555,11 @@ struct sta_txpwr { * @supported_channels_len: number of supported channels * @supported_oper_classes: supported oper classes in IEEE 802.11 format * @supported_oper_classes_len: number of supported operating classes - * @opmode_notif: operating mode field from Operating Mode Notification - * @opmode_notif_used: information if operating mode field is used * @support_p2p_ps: information if station supports P2P PS mechanism - * @he_capa: HE capabilities of station - * @he_capa_len: the length of the HE capabilities * @airtime_weight: airtime scheduler weight for this station - * @txpwr: transmit power for an associated station - * @he_6ghz_capa: HE 6 GHz Band capabilities of station + * @link_sta_params: link related params. */ struct station_parameters { - const u8 *supported_rates; struct net_device *vlan; u32 sta_flags_mask, sta_flags_set; u32 sta_modify_mask; @@ -1438,11 +1567,8 @@ struct station_parameters { u16 aid; u16 vlan_id; u16 peer_aid; - u8 supported_rates_len; u8 plink_action; u8 plink_state; - const struct ieee80211_ht_cap *ht_capa; - const struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; @@ -1453,14 +1579,9 @@ struct station_parameters { u8 supported_channels_len; const u8 *supported_oper_classes; u8 supported_oper_classes_len; - u8 opmode_notif; - bool opmode_notif_used; int support_p2p_ps; - const struct ieee80211_he_cap_elem *he_capa; - u8 he_capa_len; u16 airtime_weight; - struct sta_txpwr txpwr; - const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + struct link_station_parameters link_sta_params; }; /** @@ -1538,6 +1659,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS + * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1547,6 +1669,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), + RATE_INFO_FLAGS_EHT_MCS = BIT(7), }; /** @@ -1561,6 +1684,8 @@ enum rate_info_flags { * @RATE_INFO_BW_80: 80 MHz bandwidth * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation + * @RATE_INFO_BW_320: 320 MHz bandwidth + * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation */ enum rate_info_bw { RATE_INFO_BW_20 = 0, @@ -1570,6 +1695,8 @@ enum rate_info_bw { RATE_INFO_BW_80, RATE_INFO_BW_160, RATE_INFO_BW_HE_RU, + RATE_INFO_BW_320, + RATE_INFO_BW_EHT_RU, }; /** @@ -1587,6 +1714,9 @@ enum rate_info_bw { * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc, * only valid if bw is %RATE_INFO_BW_HE_RU) * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4) + * @eht_gi: EHT guard interval (from &enum nl80211_eht_gi) + * @eht_ru_alloc: EHT RU allocation (from &enum nl80211_eht_ru_alloc, + * only valid if bw is %RATE_INFO_BW_EHT_RU) */ struct rate_info { u8 flags; @@ -1598,6 +1728,8 @@ struct rate_info { u8 he_dcm; u8 he_ru_alloc; u8 n_bonded_ch; + u8 eht_gi; + u8 eht_ru_alloc; }; /** @@ -2069,6 +2201,9 @@ struct bss_parameters { * @plink_timeout: If no tx activity is seen from a STA we've established * peering with for longer than this time (in seconds), then remove it * from the STA's list of peers. Default is 30 minutes. + * @dot11MeshConnectedToAuthServer: if set to true then this mesh STA + * will advertise that it is connected to a authentication server + * in the mesh formation field. * @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is * connected to a mesh gate in mesh formation info. If false, the * value in mesh formation is determined by the presence of root paths @@ -2181,6 +2316,7 @@ struct ocb_setup { * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range * 1..32767] * @aifs: Arbitration interframe space [0..255] + * @link_id: link_id or -1 for non-MLD */ struct ieee80211_txq_params { enum nl80211_ac ac; @@ -2188,6 +2324,7 @@ struct ieee80211_txq_params { u16 cwmin; u16 cwmax; u8 aifs; + int link_id; }; /** @@ -2241,12 +2378,12 @@ struct cfg80211_scan_info { /** * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only * - * @short_bssid: short ssid to scan for + * @short_ssid: short ssid to scan for * @bssid: bssid to scan for * @channel_idx: idx of the channel in the channel array in the scan request * which the above info relvant to * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU - * @short_ssid_valid: short_ssid is valid and can be used + * @short_ssid_valid: @short_ssid is valid and can be used * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait * 20 TUs before starting to send probe requests. */ @@ -2615,7 +2752,7 @@ const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id); */ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) { - return (void *)ieee80211_bss_get_elem(bss, id); + return (const void *)ieee80211_bss_get_elem(bss, id); } @@ -2638,6 +2775,12 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) * Authentication algorithm number, i.e., starting at the Authentication * transaction sequence number field. * @auth_data_len: Length of auth_data buffer in octets + * @link_id: if >= 0, indicates authentication should be done as an MLD, + * the interface address is included as the MLD address and the + * necessary link (with the given link_id) will be created (and + * given an MLD address) by the driver + * @ap_mld_addr: AP MLD address in case of authentication request with + * an AP MLD, valid iff @link_id >= 0 */ struct cfg80211_auth_request { struct cfg80211_bss *bss; @@ -2645,9 +2788,25 @@ struct cfg80211_auth_request { size_t ie_len; enum nl80211_auth_type auth_type; const u8 *key; - u8 key_len, key_idx; + u8 key_len; + s8 key_idx; const u8 *auth_data; size_t auth_data_len; + s8 link_id; + const u8 *ap_mld_addr; +}; + +/** + * struct cfg80211_assoc_link - per-link information for MLO association + * @bss: the BSS pointer, see also &struct cfg80211_assoc_request::bss; + * if this is %NULL for a link, that link is not requested + * @elems: extra elements for the per-STA profile for this link + * @elems_len: length of the elements + */ +struct cfg80211_assoc_link { + struct cfg80211_bss *bss; + const u8 *elems; + size_t elems_len; }; /** @@ -2661,6 +2820,10 @@ struct cfg80211_auth_request { * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). * @ASSOC_REQ_DISABLE_HE: Disable HE + * @ASSOC_REQ_DISABLE_EHT: Disable EHT + * @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links. + * Drivers shall disable MLO features for the current association if this + * flag is not set. */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -2668,6 +2831,8 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), ASSOC_REQ_DISABLE_HE = BIT(4), + ASSOC_REQ_DISABLE_EHT = BIT(5), + CONNECT_REQ_MLO_SUPPORT = BIT(6), }; /** @@ -2679,6 +2844,8 @@ enum cfg80211_assoc_req_flags { * given a reference that it must give back to cfg80211_send_rx_assoc() * or to cfg80211_assoc_timeout(). To ensure proper refcounting, new * association requests while already associating must be rejected. + * This also applies to the @links.bss parameter, which is used instead + * of this one (it is %NULL) for MLO associations. * @ie: Extra IEs to add to (Re)Association Request frame or %NULL * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association @@ -2703,6 +2870,11 @@ enum cfg80211_assoc_req_flags { * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. * @s1g_capa: S1G capability override * @s1g_capa_mask: S1G capability override mask + * @links: per-link information for MLO connections + * @link_id: >= 0 for MLO connections, where links are given, and indicates + * the link on which the association request should be sent + * @ap_mld_addr: AP MLD address in case of MLO association request, + * valid iff @link_id >= 0 */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -2718,6 +2890,9 @@ struct cfg80211_assoc_request { size_t fils_kek_len; const u8 *fils_nonces; struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask; + struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS]; + const u8 *ap_mld_addr; + s8 link_id; }; /** @@ -2726,7 +2901,7 @@ struct cfg80211_assoc_request { * This structure provides information needed to complete IEEE 802.11 * deauthentication. * - * @bssid: the BSSID of the BSS to deauthenticate from + * @bssid: the BSSID or AP MLD address to deauthenticate from * @ie: Extra IEs to add to Deauthentication frame or %NULL * @ie_len: Length of ie buffer in octets * @reason_code: The reason code for the deauthentication @@ -2747,7 +2922,7 @@ struct cfg80211_deauth_request { * This structure provides information needed to complete IEEE 802.11 * disassociation. * - * @bss: the BSS to disassociate from + * @ap_addr: the BSSID or AP MLD address to disassociate from * @ie: Extra IEs to add to Disassociation frame or %NULL * @ie_len: Length of ie buffer in octets * @reason_code: The reason code for the disassociation @@ -2755,7 +2930,7 @@ struct cfg80211_deauth_request { * Disassociation frame is to be transmitted. */ struct cfg80211_disassoc_request { - struct cfg80211_bss *bss; + const u8 *ap_addr; const u8 *ie; size_t ie_len; u16 reason_code; @@ -3200,7 +3375,7 @@ struct cfg80211_wowlan_wakeup { * @kck: key confirmation key (@kck_len bytes) * @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes) * @kek_len: length of kek - * @kck_len length of kck + * @kck_len: length of kck * @akm: akm (oui, id) */ struct cfg80211_gtk_rekey_data { @@ -3238,6 +3413,9 @@ struct cfg80211_update_ft_ies_params { * @dont_wait_for_ack: tells the low level not to wait for an ack * @n_csa_offsets: length of csa_offsets array * @csa_offsets: array of all the csa offsets in the frame + * @link_id: for MLO, the link ID to transmit on, -1 if not given; note + * that the link ID isn't validated (much), it's in range but the + * link might not exist (or be used by the receiver STA) */ struct cfg80211_mgmt_tx_params { struct ieee80211_channel *chan; @@ -3249,6 +3427,7 @@ struct cfg80211_mgmt_tx_params { bool dont_wait_for_ack; int n_csa_offsets; const u16 *csa_offsets; + int link_id; }; /** @@ -3562,6 +3741,7 @@ struct cfg80211_pmsr_ftm_result { * @type: type of the measurement reported, note that we only support reporting * one type at a time, but you can report multiple results separately and * they're all aggregated for userspace. + * @ftm: FTM result */ struct cfg80211_pmsr_result { u64 host_time, ap_tsf; @@ -3700,7 +3880,7 @@ struct cfg80211_update_owe_info { * for the entire device * @interface_stypes: bitmap of management frame subtypes registered * for the given interface - * @global_mcast_rx: mcast RX is needed globally for these subtypes + * @global_mcast_stypes: mcast RX is needed globally for these subtypes * @interface_mcast_stypes: mcast RX is needed on this interface * for these subtypes */ @@ -3745,23 +3925,39 @@ struct mgmt_frame_regs { * keep the struct wireless_dev's iftype updated. * This additionally holds the RTNL to be able to do netdev changes. * + * @add_intf_link: Add a new MLO link to the given interface. Note that + * the wdev->link[] data structure has been updated, so the new link + * address is available. + * @del_intf_link: Remove an MLO link from the given interface. + * * @add_key: add a key with the given parameters. @mac_addr will be %NULL - * when adding a group key. + * when adding a group key. @link_id will be -1 for non-MLO connection. + * For MLO connection, @link_id will be >= 0 for group key and -1 for + * pairwise key, @mac_addr will be peer's MLD address for MLO pairwise key. * * @get_key: get information about the key with the given parameters. * @mac_addr will be %NULL when requesting information for a group * key. All pointers given to the @callback function need not be valid * after it returns. This function should return an error if it is * not possible to retrieve the key, -ENOENT if it doesn't exist. + * @link_id will be -1 for non-MLO connection. For MLO connection, + * @link_id will be >= 0 for group key and -1 for pairwise key, @mac_addr + * will be peer's MLD address for MLO pairwise key. * * @del_key: remove a key given the @mac_addr (%NULL for a group key) - * and @key_index, return -ENOENT if the key doesn't exist. + * and @key_index, return -ENOENT if the key doesn't exist. @link_id will + * be -1 for non-MLO connection. For MLO connection, @link_id will be >= 0 + * for group key and -1 for pairwise key, @mac_addr will be peer's MLD + * address for MLO pairwise key. * - * @set_default_key: set the default key on an interface + * @set_default_key: set the default key on an interface. @link_id will be >= 0 + * for MLO connection and -1 for non-MLO connection. * - * @set_default_mgmt_key: set the default management frame key on an interface + * @set_default_mgmt_key: set the default management frame key on an interface. + * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection. * - * @set_default_beacon_key: set the default Beacon frame key on an interface + * @set_default_beacon_key: set the default Beacon frame key on an interface. + * @link_id will be >= 0 for MLO connection and -1 for non-MLO connection. * * @set_rekey_data: give the data necessary for GTK rekeying to the driver * @@ -4072,6 +4268,18 @@ struct mgmt_frame_regs { * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. + * + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. + * @add_link_station: Add a link to a station. + * @mod_link_station: Modify a link of a station. + * @del_link_station: Remove a link of a station. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4090,30 +4298,40 @@ struct cfg80211_ops { enum nl80211_iftype type, struct vif_params *params); + int (*add_intf_link)(struct wiphy *wiphy, + struct wireless_dev *wdev, + unsigned int link_id); + void (*del_intf_link)(struct wiphy *wiphy, + struct wireless_dev *wdev, + unsigned int link_id); + int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - struct key_params *params); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr, - void *cookie, + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, bool pairwise, const u8 *mac_addr); + int link_id, u8 key_index, bool pairwise, + const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct net_device *netdev, int link_id, u8 key_index); int (*set_default_beacon_key)(struct wiphy *wiphy, struct net_device *netdev, + int link_id, u8 key_index); int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_beacon_data *info); - int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev); + int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id); int (*add_station)(struct wiphy *wiphy, struct net_device *dev, @@ -4221,6 +4439,7 @@ struct cfg80211_ops { int (*set_bitrate_mask)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask); @@ -4296,6 +4515,7 @@ struct cfg80211_ops { int (*get_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*start_p2p_device)(struct wiphy *wiphy, @@ -4332,6 +4552,7 @@ struct cfg80211_ops { struct cfg80211_qos_map *qos_map); int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev, @@ -4378,7 +4599,7 @@ struct cfg80211_ops { struct net_device *dev, const u8 *buf, size_t len, const u8 *dest, const __be16 proto, - const bool noencrypt, + const bool noencrypt, int link_id, u64 *cookie); int (*get_ftm_responder_stats)(struct wiphy *wiphy, @@ -4404,6 +4625,14 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); + int (*add_link_station)(struct wiphy *wiphy, struct net_device *dev, + struct link_station_parameters *params); + int (*mod_link_station)(struct wiphy *wiphy, struct net_device *dev, + struct link_station_parameters *params); + int (*del_link_station)(struct wiphy *wiphy, struct net_device *dev, + struct link_station_del_parameters *params); }; /* @@ -4455,10 +4684,14 @@ struct cfg80211_ops { * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation * before connection. * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys + * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs, + * in order to not have them reachable in normal drivers, until we have + * complete feature/interface combinations/etc. advertisement. No driver + * should set this flag for now. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), - /* use hole at 1 */ + WIPHY_FLAG_SUPPORTS_MLO = BIT(1), WIPHY_FLAG_SPLIT_SCAN_6GHZ = BIT(2), WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), @@ -4779,19 +5012,32 @@ struct wiphy_vendor_command { * 802.11-2012 8.4.2.29 for the defined fields. * @extended_capabilities_mask: mask of the valid values * @extended_capabilities_len: length of the extended capabilities + * @eml_capabilities: EML capabilities (for MLO) + * @mld_capa_and_ops: MLD capabilities and operations (for MLO) */ struct wiphy_iftype_ext_capab { enum nl80211_iftype iftype; const u8 *extended_capabilities; const u8 *extended_capabilities_mask; u8 extended_capabilities_len; + u16 eml_capabilities; + u16 mld_capa_and_ops; }; /** + * cfg80211_get_iftype_ext_capa - lookup interface type extended capability + * @wiphy: the wiphy to look up from + * @type: the interface type to look up + */ +const struct wiphy_iftype_ext_capab * +cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type); + +/** * struct cfg80211_pmsr_capabilities - cfg80211 peer measurement capabilities * @max_peers: maximum number of peers in a single measurement * @report_ap_tsf: can report assoc AP's TSF for radio resource measurement * @randomize_mac_addr: can randomize MAC address for measurement + * @ftm: FTM measurement data * @ftm.supported: FTM measurement is supported * @ftm.asap: ASAP-mode is supported * @ftm.non_asap: non-ASAP-mode is supported @@ -5044,6 +5290,13 @@ struct wiphy_iftype_akm_suites { * @ema_max_profile_periodicity: maximum profile periodicity supported by * the driver. Setting this field to a non-zero value indicates that the * driver supports enhanced multi-BSSID advertisements (EMA AP). + * @max_num_akm_suites: maximum number of AKM suites allowed for + * configuration through %NL80211_CMD_CONNECT, %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_START_AP. Set to NL80211_MAX_NR_AKM_SUITES if not set by + * driver. If set by driver minimum allowed value is + * NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with + * legacy userspace and maximum allowed value is + * CFG80211_MAX_NUM_AKM_SUITES. */ struct wiphy { struct mutex mtx; @@ -5190,6 +5443,7 @@ struct wiphy { u8 mbssid_max_interfaces; u8 ema_max_profile_periodicity; + u16 max_num_akm_suites; char priv[] __aligned(NETDEV_ALIGN); }; @@ -5415,16 +5669,9 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this * wireless device if it has no netdev - * @current_bss: (private) Used by the internal configuration code - * @chandef: (private) Used by the internal configuration code to track - * the user-set channel definition. - * @preset_chandef: (private) Used by the internal configuration code to - * track the channel to be used for AP later + * @u: union containing data specific to @iftype + * @connected: indicates if connected or not (STA mode) * @bssid: (private) Used by the internal configuration code - * @ssid: (private) Used by the internal configuration code - * @ssid_len: (private) Used by the internal configuration code - * @mesh_id_len: (private) Used by the internal configuration code - * @mesh_id_up_len: (private) Used by the internal configuration code * @wext: (private) Used by the internal wireless extensions compat code * @wext.ibss: (private) IBSS data part of wext handling * @wext.connect: (private) connection handling data @@ -5464,8 +5711,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @conn_owner_nlportid: (private) connection owner socket port ID * @disconnect_wk: (private) auto-disconnect work * @disconnect_bssid: (private) the BSSID to use for auto-disconnect - * @ibss_fixed: (private) IBSS is using fixed BSSID - * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID @@ -5476,6 +5721,9 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @pmsr_free_wk: (private) peer measurements cleanup work * @unprot_beacon_reported: (private) timestamp of last * unprotected beacon report + * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr + * @ap and @client for each link + * @valid_links: bitmap describing what elements of @links are valid */ struct wireless_dev { struct wiphy *wiphy; @@ -5497,8 +5745,6 @@ struct wireless_dev { u8 address[ETH_ALEN] __aligned(sizeof(u16)); /* currently used for IBSS and SME - might be rearranged later */ - u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 ssid_len, mesh_id_len, mesh_id_up_len; struct cfg80211_conn *conn; struct cfg80211_cached_keys *connect_keys; enum ieee80211_bss_type conn_bss_type; @@ -5510,23 +5756,17 @@ struct wireless_dev { struct list_head event_list; spinlock_t event_lock; - struct cfg80211_internal_bss *current_bss; /* associated / joined */ - struct cfg80211_chan_def preset_chandef; - struct cfg80211_chan_def chandef; - - bool ibss_fixed; - bool ibss_dfs_possible; + u8 connected:1; bool ps; int ps_timeout; - int beacon_interval; - u32 ap_unexpected_nlportid; u32 owner_nlportid; bool nl_owner_dead; + /* FIXME: need to rework radar detection for MLO */ bool cac_started; unsigned long cac_start_time; unsigned int cac_time_ms; @@ -5554,6 +5794,50 @@ struct wireless_dev { struct work_struct pmsr_free_wk; unsigned long unprot_beacon_reported; + + union { + struct { + u8 connected_addr[ETH_ALEN] __aligned(2); + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } client; + struct { + int beacon_interval; + struct cfg80211_chan_def preset_chandef; + struct cfg80211_chan_def chandef; + u8 id[IEEE80211_MAX_SSID_LEN]; + u8 id_len, id_up_len; + } mesh; + struct { + struct cfg80211_chan_def preset_chandef; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + struct cfg80211_chan_def chandef; + int beacon_interval; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ibss; + struct { + struct cfg80211_chan_def chandef; + } ocb; + } u; + + struct { + u8 addr[ETH_ALEN] __aligned(2); + union { + struct { + unsigned int beacon_interval; + struct cfg80211_chan_def chandef; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + } client; + }; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; + u16 valid_links; }; static inline const u8 *wdev_address(struct wireless_dev *wdev) @@ -5583,6 +5867,32 @@ static inline void *wdev_priv(struct wireless_dev *wdev) } /** + * wdev_chandef - return chandef pointer from wireless_dev + * @wdev: the wdev + * @link_id: the link ID for MLO + * + * Return: The chandef depending on the mode, or %NULL. + */ +struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, + unsigned int link_id); + +static inline void WARN_INVALID_LINK_ID(struct wireless_dev *wdev, + unsigned int link_id) +{ + WARN_ON(link_id && !wdev->valid_links); + WARN_ON(wdev->valid_links && + !(wdev->valid_links & BIT(link_id))); +} + +#define for_each_valid_link(link_info, link_id) \ + for (link_id = 0; \ + link_id < ((link_info)->valid_links ? \ + ARRAY_SIZE((link_info)->links) : 1); \ + link_id++) \ + if (!(link_info)->valid_links || \ + ((link_info)->valid_links & BIT(link_id))) + +/** * DOC: Utility functions * * cfg80211 offers a number of utility functions that can be useful. @@ -5858,6 +6168,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * @addr: the device MAC address * @iftype: the virtual interface type * @data_offset: offset of payload after the 802.11 header + * @is_amsdu: true if the 802.11 header is A-MSDU * Return: 0 on success. Non-zero on error. */ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, @@ -5970,9 +6281,9 @@ cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len, (!match_len && match_offset))) return NULL; - return (void *)cfg80211_find_elem_match(eid, ies, len, - match, match_len, - match_offset ? + return (const void *)cfg80211_find_elem_match(eid, ies, len, + match, match_len, + match_offset ? match_offset - 2 : 0); } @@ -6099,7 +6410,7 @@ static inline const u8 * cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, unsigned int len) { - return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); + return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); } /** @@ -6375,17 +6686,6 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, } /** - * cfg80211_get_ies_channel_number - returns the channel number from ies - * @ie: IEs - * @ielen: length of IEs - * @band: enum nl80211_band of the channel - * - * Returns the channel number, or -1 if none could be determined. - */ -int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band); - -/** * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check * @non_inherit_element: non inheritance element @@ -6421,6 +6721,19 @@ enum cfg80211_bss_frame_type { }; /** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * @ftype: frame type + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype); + +/** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * * @wiphy: the wiphy reporting the BSS @@ -6608,16 +6921,36 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); /** - * cfg80211_rx_assoc_resp - notification of processed association response - * @dev: network device + * struct cfg80211_rx_assoc_resp - association response data * @bss: the BSS that association was requested with, ownership of the pointer - * moves to cfg80211 in this call + * moves to cfg80211 in the call to cfg80211_rx_assoc_resp() * @buf: (Re)Association Response frame (header + body) * @len: length of the frame data * @uapsd_queues: bitmap of queues configured for uapsd. Same format * as the AC bitmap in the QoS info field * @req_ies: information elements from the (Re)Association Request frame * @req_ies_len: length of req_ies data + * @ap_mld_addr: AP MLD address (in case of MLO) + * @links: per-link information indexed by link ID, use links[0] for + * non-MLO connections + */ +struct cfg80211_rx_assoc_resp { + const u8 *buf; + size_t len; + const u8 *req_ies; + size_t req_ies_len; + int uapsd_queues; + const u8 *ap_mld_addr; + struct { + const u8 *addr; + struct cfg80211_bss *bss; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; +}; + +/** + * cfg80211_rx_assoc_resp - notification of processed association response + * @dev: network device + * @data: association response data, &struct cfg80211_rx_assoc_resp * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -6625,30 +6958,32 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * This function may sleep. The caller must hold the corresponding wdev's mutex. */ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_bss *bss, - const u8 *buf, size_t len, - int uapsd_queues, - const u8 *req_ies, size_t req_ies_len); + struct cfg80211_rx_assoc_resp *data); /** - * cfg80211_assoc_timeout - notification of timed out association - * @dev: network device - * @bss: The BSS entry with which association timed out. - * - * This function may sleep. The caller must hold the corresponding wdev's mutex. + * struct cfg80211_assoc_failure - association failure data + * @ap_mld_addr: AP MLD address, or %NULL + * @bss: list of BSSes, must use entry 0 for non-MLO connections + * (@ap_mld_addr is %NULL) + * @timeout: indicates the association failed due to timeout, otherwise + * the association was abandoned for a reason reported through some + * other API (e.g. deauth RX) */ -void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); +struct cfg80211_assoc_failure { + const u8 *ap_mld_addr; + struct cfg80211_bss *bss[IEEE80211_MLD_MAX_NUM_LINKS]; + bool timeout; +}; /** - * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * cfg80211_assoc_failure - notification of association failure * @dev: network device - * @bss: The BSS entry with which association was abandoned. + * @data: data describing the association failure * - * Call this whenever - for reasons reported through other API, like deauth RX, - * an association attempt was abandoned. * This function may sleep. The caller must hold the corresponding wdev's mutex. */ -void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); +void cfg80211_assoc_failure(struct net_device *dev, + struct cfg80211_assoc_failure *data); /** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame @@ -6725,6 +7060,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, * @macaddr: the MAC address of the new candidate * @ie: information elements advertised by the peer candidate * @ie_len: length of the information elements buffer + * @sig_dbm: signal level in dBm * @gfp: allocation flags * * This function notifies cfg80211 that the mesh peer candidate has been @@ -7089,13 +7425,6 @@ struct cfg80211_fils_resp_params { * indicate that this is a failure, but without a status code. * @timeout_reason is used to report the reason for the timeout in that * case. - * @bssid: The BSSID of the AP (may be %NULL) - * @bss: Entry of bss to which STA got connected to, can be obtained through - * cfg80211_get_bss() (may be %NULL). But it is recommended to store the - * bss from the connect_request and hold a reference to it and return - * through this param to avoid a warning if the bss is expired during the - * connection, esp. for those drivers implementing connect op. - * Only one parameter among @bssid and @bss needs to be specified. * @req_ie: Association request IEs (may be %NULL) * @req_ie_len: Association request IEs length * @resp_ie: Association response IEs (may be %NULL) @@ -7107,17 +7436,41 @@ struct cfg80211_fils_resp_params { * not known. This value is used only if @status < 0 to indicate that the * failure is due to a timeout and not due to explicit rejection by the AP. * This value is ignored in other cases (@status >= 0). + * @valid_links: For MLO connection, BIT mask of the valid link ids. Otherwise + * zero. + * @ap_mld_addr: For MLO connection, MLD address of the AP. Otherwise %NULL. + * @links : For MLO connection, contains link info for the valid links indicated + * using @valid_links. For non-MLO connection, links[0] contains the + * connected AP info. + * @links.addr: For MLO connection, MAC address of the STA link. Otherwise + * %NULL. + * @links.bssid: For MLO connection, MAC address of the AP link. For non-MLO + * connection, links[0].bssid points to the BSSID of the AP (may be %NULL). + * @links.bss: For MLO connection, entry of bss to which STA link is connected. + * For non-MLO connection, links[0].bss points to entry of bss to which STA + * is connected. It can be obtained through cfg80211_get_bss() (may be + * %NULL). It is recommended to store the bss from the connect_request and + * hold a reference to it and return through this param to avoid a warning + * if the bss is expired during the connection, esp. for those drivers + * implementing connect op. Only one parameter among @bssid and @bss needs + * to be specified. */ struct cfg80211_connect_resp_params { int status; - const u8 *bssid; - struct cfg80211_bss *bss; const u8 *req_ie; size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; struct cfg80211_fils_resp_params fils; enum nl80211_timeout_reason timeout_reason; + + const u8 *ap_mld_addr; + u16 valid_links; + struct { + const u8 *addr; + const u8 *bssid; + struct cfg80211_bss *bss; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; /** @@ -7187,8 +7540,8 @@ cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, memset(¶ms, 0, sizeof(params)); params.status = status; - params.bssid = bssid; - params.bss = bss; + params.links[0].bssid = bssid; + params.links[0].bss = bss; params.req_ie = req_ie; params.req_ie_len = req_ie_len; params.resp_ie = resp_ie; @@ -7259,24 +7612,40 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, /** * struct cfg80211_roam_info - driver initiated roaming information * - * @channel: the channel of the new AP - * @bss: entry of bss to which STA got roamed (may be %NULL if %bssid is set) - * @bssid: the BSSID of the new AP (may be %NULL if %bss is set) * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length * @fils: FILS related roaming information. + * @valid_links: For MLO roaming, BIT mask of the new valid links is set. + * Otherwise zero. + * @ap_mld_addr: For MLO roaming, MLD address of the new AP. Otherwise %NULL. + * @links : For MLO roaming, contains new link info for the valid links set in + * @valid_links. For non-MLO roaming, links[0] contains the new AP info. + * @links.addr: For MLO roaming, MAC address of the STA link. Otherwise %NULL. + * @links.bssid: For MLO roaming, MAC address of the new AP link. For non-MLO + * roaming, links[0].bssid points to the BSSID of the new AP. May be + * %NULL if %links.bss is set. + * @links.channel: the channel of the new AP. + * @links.bss: For MLO roaming, entry of new bss to which STA link got + * roamed. For non-MLO roaming, links[0].bss points to entry of bss to + * which STA got roamed (may be %NULL if %links.bssid is set) */ struct cfg80211_roam_info { - struct ieee80211_channel *channel; - struct cfg80211_bss *bss; - const u8 *bssid; const u8 *req_ie; size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; struct cfg80211_fils_resp_params fils; + + const u8 *ap_mld_addr; + u16 valid_links; + struct { + const u8 *addr; + const u8 *bssid; + struct ieee80211_channel *channel; + struct cfg80211_bss *bss; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; /** @@ -7443,6 +7812,48 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, gfp_t gfp); /** + * struct cfg80211_rx_info - received management frame info + * + * @freq: Frequency on which the frame was received in kHz + * @sig_dbm: signal strength in dBm, or 0 if unknown + * @have_link_id: indicates the frame was received on a link of + * an MLD, i.e. the @link_id field is valid + * @link_id: the ID of the link the frame was received on + * @buf: Management frame (header + body) + * @len: length of the frame data + * @flags: flags, as defined in enum nl80211_rxmgmt_flags + * @rx_tstamp: Hardware timestamp of frame RX in nanoseconds + * @ack_tstamp: Hardware timestamp of ack TX in nanoseconds + */ +struct cfg80211_rx_info { + int freq; + int sig_dbm; + bool have_link_id; + u8 link_id; + const u8 *buf; + size_t len; + u32 flags; + u64 rx_tstamp; + u64 ack_tstamp; +}; + +/** + * cfg80211_rx_mgmt_ext - management frame notification with extended info + * @wdev: wireless device receiving the frame + * @info: RX info as defined in struct cfg80211_rx_info + * + * This function is called whenever an Action frame is received for a station + * mode interface, but is not processed in kernel. + * + * Return: %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. + */ +bool cfg80211_rx_mgmt_ext(struct wireless_dev *wdev, + struct cfg80211_rx_info *info); + +/** * cfg80211_rx_mgmt_khz - notification of received, unprocessed management frame * @wdev: wireless device receiving the frame * @freq: Frequency on which the frame was received in KHz @@ -7459,8 +7870,20 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, * action frames; %false otherwise, in which case for action frames the * driver is responsible for rejecting the frame. */ -bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, - const u8 *buf, size_t len, u32 flags); +static inline bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, + int sig_dbm, const u8 *buf, size_t len, + u32 flags) +{ + struct cfg80211_rx_info info = { + .freq = freq, + .sig_dbm = sig_dbm, + .buf = buf, + .len = len, + .flags = flags + }; + + return cfg80211_rx_mgmt_ext(wdev, &info); +} /** * cfg80211_rx_mgmt - notification of received, unprocessed management frame @@ -7483,11 +7906,50 @@ static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, const u8 *buf, size_t len, u32 flags) { - return cfg80211_rx_mgmt_khz(wdev, MHZ_TO_KHZ(freq), sig_dbm, buf, len, - flags); + struct cfg80211_rx_info info = { + .freq = MHZ_TO_KHZ(freq), + .sig_dbm = sig_dbm, + .buf = buf, + .len = len, + .flags = flags + }; + + return cfg80211_rx_mgmt_ext(wdev, &info); } /** + * struct cfg80211_tx_status - TX status for management frame information + * + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @tx_tstamp: hardware TX timestamp in nanoseconds + * @ack_tstamp: hardware ack RX timestamp in nanoseconds + * @buf: Management frame (header + body) + * @len: length of the frame data + * @ack: Whether frame was acknowledged + */ +struct cfg80211_tx_status { + u64 cookie; + u64 tx_tstamp; + u64 ack_tstamp; + const u8 *buf; + size_t len; + bool ack; +}; + +/** + * cfg80211_mgmt_tx_status_ext - TX status notification with extended info + * @wdev: wireless device receiving the frame + * @status: TX status data + * @gfp: context flags + * + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the + * transmission attempt with extended info. + */ +void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev, + struct cfg80211_tx_status *status, gfp_t gfp); + +/** * cfg80211_mgmt_tx_status - notification of TX status for management frame * @wdev: wireless device receiving the frame * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() @@ -7500,8 +7962,19 @@ static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +static inline void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, + u64 cookie, const u8 *buf, + size_t len, bool ack, gfp_t gfp) +{ + struct cfg80211_tx_status status = { + .cookie = cookie, + .buf = buf, + .len = len, + .ack = ack + }; + + cfg80211_mgmt_tx_status_ext(wdev, &status, gfp); +} /** * cfg80211_control_port_tx_status - notification of TX status for control @@ -7594,15 +8067,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); /** - * cfg80211_radar_event - radar detection event + * __cfg80211_radar_event - radar detection event * @wiphy: the wiphy * @chandef: chandef for the current channel + * @offchan: the radar has been detected on the offchannel chain * @gfp: context flags * * This function is called when a radar is detected on the current chanenl. */ -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, gfp_t gfp); +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp); + +static inline void +cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, false, gfp); +} + +static inline void +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, true, gfp); +} /** * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event @@ -7633,6 +8124,14 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event + * @wiphy: the wiphy + * + * This function is called by the driver when a Channel Availability Check + * (CAC) is aborted by a offchannel dedicated chain. + */ +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying @@ -7769,17 +8268,20 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * cfg80211_ch_switch_notify - update wdev channel and notify userspace * @dev: the device which switched channels * @chandef: the new channel definition + * @link_id: the link ID for MLO, must be 0 for non-MLO * * Caller must acquire wdev_lock, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef); + struct cfg80211_chan_def *chandef, + unsigned int link_id); /* * cfg80211_ch_switch_started_notify - notify channel switch start * @dev: the device on which the channel switch started * @chandef: the future channel definition + * @link_id: the link ID for MLO, must be 0 for non-MLO * @count: the number of TBTTs until the channel switch happens * @quiet: whether or not immediate quiet was requested by the AP * @@ -7789,7 +8291,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, */ void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - u8 count, bool quiet); + unsigned int link_id, u8 count, + bool quiet); /** * ieee80211_operating_class_to_band - convert operating class to band @@ -7893,7 +8396,9 @@ int cfg80211_register_netdevice(struct net_device *dev); */ static inline void cfg80211_unregister_netdevice(struct net_device *dev) { +#if IS_ENABLED(CONFIG_CFG80211) cfg80211_unregister_wdev(dev->ieee80211_ptr); +#endif } /** @@ -8250,6 +8755,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @ap_addr: AP (MLD) address that rejected the assocation + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + const u8 *ap_addr, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ @@ -8335,11 +8852,12 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of + * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, + return cfg80211_bss_color_notify(dev, gfp, NL80211_CMD_OBSS_COLOR_COLLISION, 0, color_bitmap); } diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ed07844eb24..d8d8719315fd 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -203,8 +203,8 @@ struct wpan_phy { /* PHY depended MAC PIB values */ - /* 802.15.4 acronym: Tdsym in usec */ - u8 symbol_duration; + /* 802.15.4 acronym: Tdsym in nsec */ + u32 symbol_duration; /* lifs and sifs periods timing */ u16 lifs_period; u16 sifs_period; @@ -227,6 +227,16 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) write_pnet(&wpan_phy->_net, net); } +/** + * struct ieee802154_addr - IEEE802.15.4 device address + * @mode: Address mode from frame header. Can be one of: + * - @IEEE802154_ADDR_NONE + * - @IEEE802154_ADDR_SHORT + * - @IEEE802154_ADDR_LONG + * @pan_id: The PAN ID this address belongs to + * @short_addr: address if @mode is @IEEE802154_ADDR_SHORT + * @extended_addr: address if @mode is @IEEE802154_ADDR_LONG + */ struct ieee802154_addr { u8 mode; __le16 pan_id; @@ -363,6 +373,7 @@ struct wpan_dev { #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) +#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) static inline int wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, @@ -373,6 +384,7 @@ wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len); } +#endif struct wpan_phy * wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size); @@ -405,4 +417,6 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) return dev_name(&phy->dev); } +void ieee802154_configure_durations(struct wpan_phy *phy); + #endif /* __NET_CFG802154_H */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 5b96d5bd6e54..6bc783b7a06c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include <asm/checksum.h> #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { @@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY -static inline __wsum +static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); @@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,55 +75,58 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum csum_shift(__wsum sum, int offset) +#ifndef HAVE_ARCH_CSUM_SHIFT +static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ if (offset & 1) return (__force __wsum)ror32((__force u32)sum, 8); return sum; } +#endif -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { return csum_add(csum, csum_shift(csum2, offset)); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -136,11 +139,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); @@ -150,16 +158,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -175,9 +183,13 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } +static __always_inline __wsum wsum_negate(__wsum val) +{ + return (__force __wsum)-((__force u32)val); +} #endif diff --git a/include/net/codel.h b/include/net/codel.h index a6c9e34e62b8..5fed2f16cb8d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -44,8 +44,6 @@ #include <linux/types.h> #include <linux/ktime.h> #include <linux/skbuff.h> -#include <net/pkt_sched.h> -#include <net/inet_ecn.h> /* Controlling Queue Delay (CoDel) algorithm * ========================================= diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 137d40d8cbeb..78a27ac73070 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/inet_ecn.h> + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 098630f83a55..7d3d9219f4fe 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,9 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/codel.h> +#include <net/pkt_sched.h> + /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; diff --git a/include/net/compat.h b/include/net/compat.h index 595fee069b82..84c163f40f38 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -46,9 +46,8 @@ struct compat_rtentry { unsigned short rt_irtt; /* Initial RTT */ }; -int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, - struct sockaddr __user **save_addr, compat_uptr_t *ptr, - compat_size_t *len); +int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg, + struct sockaddr __user **save_addr); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); int put_cmsg_compat(struct msghdr*, int, int, int, void *); diff --git a/include/net/datalink.h b/include/net/datalink.h index d9b7faaa539f..c837ffc7ebf8 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -2,6 +2,13 @@ #ifndef _NET_INET_DATALINK_H_ #define _NET_INET_DATALINK_H_ +#include <linux/list.h> + +struct llc_sap; +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto { unsigned char type[8]; diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h index 43e34131a53f..02700262f71a 100644 --- a/include/net/dcbevent.h +++ b/include/net/dcbevent.h @@ -8,6 +8,8 @@ #ifndef _DCB_EVENT_H #define _DCB_EVENT_H +struct notifier_block; + enum dcbevent_notif_type { DCB_APP_EVENT = 1, }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index e4ad58c4062c..2b2d86fb3131 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -10,6 +10,8 @@ #include <linux/dcbnl.h> +struct net_device; + struct dcb_app_type { int ifindex; struct dcb_app app; diff --git a/include/net/devlink.h b/include/net/devlink.h index aab3d007c577..ba6b8b094943 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -22,6 +22,7 @@ #include <linux/firmware.h> struct devlink; +struct devlink_linecard; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -117,7 +118,6 @@ struct devlink_rate { struct devlink_port { struct list_head list; - struct list_head param_list; struct list_head region_list; struct devlink *devlink; unsigned int index; @@ -129,12 +129,15 @@ struct devlink_port { void *type_dev; struct devlink_port_attrs attrs; u8 attrs_set:1, - switch_port:1; + switch_port:1, + registered:1, + initialized:1; struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ struct devlink_rate *devlink_rate; + struct devlink_linecard *linecard; }; struct devlink_port_new_attrs { @@ -148,6 +151,40 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; +/** + * struct devlink_linecard_ops - Linecard operations + * @provision: callback to provision the linecard slot with certain + * type of linecard. As a result of this operation, + * driver is expected to eventually (could be after + * the function call returns) call one of: + * devlink_linecard_provision_set() + * devlink_linecard_provision_fail() + * @unprovision: callback to unprovision the linecard slot. As a result + * of this operation, driver is expected to eventually + * (could be after the function call returns) call + * devlink_linecard_provision_clear() + * devlink_linecard_provision_fail() + * @same_provision: callback to ask the driver if linecard is already + * provisioned in the same way user asks this linecard to be + * provisioned. + * @types_count: callback to get number of supported types + * @types_get: callback to get next type in list + */ +struct devlink_linecard_ops { + int (*provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv, + struct netlink_ext_ack *extack); + int (*unprovision)(struct devlink_linecard *linecard, void *priv, + struct netlink_ext_ack *extack); + bool (*same_provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv); + unsigned int (*types_count)(struct devlink_linecard *linecard, + void *priv); + void (*types_get)(struct devlink_linecard *linecard, + void *priv, unsigned int index, const char **type, + const void **type_priv); +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -361,33 +398,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para typedef u64 devlink_resource_occ_get_t(void *priv); -/** - * struct devlink_resource - devlink resource - * @name: name of the resource - * @id: id, per devlink instance - * @size: size of the resource - * @size_new: updated size of the resource, reload is needed - * @size_valid: valid in case the total size of the resource is valid - * including its children - * @parent: parent resource - * @size_params: size parameters - * @list: parent list - * @resource_list: list of child resources - */ -struct devlink_resource { - const char *name; - u64 id; - u64 size; - u64 size_new; - bool size_valid; - struct devlink_resource *parent; - struct devlink_resource_size_params size_params; - struct list_head list; - struct list_head resource_list; - devlink_resource_occ_get_t *occ_get; - void *occ_get_priv; -}; - #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" @@ -485,6 +495,9 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -534,6 +547,15 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -603,8 +625,7 @@ struct devlink_flash_update_params { u32 overwrite_mask; }; -#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT BIT(0) -#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1) +#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(0) struct devlink_region; struct devlink_info_req; @@ -687,13 +708,17 @@ struct devlink_health_reporter_ops { * @trap_name: Trap name. * @trap_group_name: Trap group name. * @input_dev: Input netdevice. + * @dev_tracker: refcount tracker for @input_dev. * @fa_cookie: Flow action user cookie. * @trap_type: Trap type. */ struct devlink_trap_metadata { const char *trap_name; const char *trap_group_name; + struct net_device *input_dev; + netdevice_tracker dev_tracker; + const struct flow_action_cookie *fa_cookie; enum devlink_trap_type trap_type; }; @@ -1208,9 +1233,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, unsigned int port_index, + int (*port_split)(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, unsigned int port_index, + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, @@ -1484,12 +1509,40 @@ struct devlink_ops { struct devlink_rate *parent, void *priv_child, void *priv_parent, struct netlink_ext_ack *extack); + /** + * selftests_check() - queries if selftest is supported + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: true if test is supported by the driver + */ + bool (*selftest_check)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); + /** + * selftest_run() - Runs a selftest + * @devlink: devlink instance + * @id: test index + * @extack: extack for reporting error messages + * + * Return: status of the test + */ + enum devlink_selftest_status + (*selftest_run)(struct devlink *devlink, unsigned int id, + struct netlink_ext_ack *extack); }; void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +/* Devlink instance explicit locking */ +void devl_lock(struct devlink *devlink); +int devl_trylock(struct devlink *devlink); +void devl_unlock(struct devlink *devlink); +void devl_assert_locked(struct devlink *devlink); +bool devl_lock_is_held(struct devlink *devlink); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); @@ -1511,9 +1564,16 @@ void devlink_set_features(struct devlink *devlink, u64 features); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); +void devlink_port_init(struct devlink *devlink, + struct devlink_port *devlink_port); +void devlink_port_fini(struct devlink_port *devlink_port); +int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index); +void devl_port_unregister(struct devlink_port *devlink_port); void devlink_port_unregister(struct devlink_port *devlink_port); void devlink_port_type_eth_set(struct devlink_port *devlink_port, struct net_device *netdev); @@ -1529,23 +1589,42 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); -int devlink_rate_leaf_create(struct devlink_port *port, void *priv); -void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); -void devlink_rate_nodes_destroy(struct devlink *devlink); +int devl_rate_leaf_create(struct devlink_port *port, void *priv); +void devl_rate_leaf_destroy(struct devlink_port *devlink_port); +void devl_rate_nodes_destroy(struct devlink *devlink); +void devlink_port_linecard_set(struct devlink_port *devlink_port, + struct devlink_linecard *linecard); +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv); +void devlink_linecard_destroy(struct devlink_linecard *linecard); +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type); +void devlink_linecard_provision_clear(struct devlink_linecard *linecard); +void devlink_linecard_provision_fail(struct devlink_linecard *linecard); +void devlink_linecard_activate(struct devlink_linecard *linecard); +void devlink_linecard_deactivate(struct devlink_linecard *linecard); +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); +int devl_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count); +void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); -int devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern); -void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name); -int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers *dpipe_headers); -void devlink_dpipe_headers_unregister(struct devlink *devlink); +int devl_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, bool counter_control_extern); +void devl_dpipe_table_unregister(struct devlink *devlink, + const char *table_name); +void devl_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers); +void devl_dpipe_headers_unregister(struct devlink *devlink); bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, const char *table_name); int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); @@ -1561,24 +1640,40 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; +int devl_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params); int devlink_resource_register(struct devlink *devlink, const char *resource_name, u64 resource_size, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); -void devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource); -int devlink_resource_size_get(struct devlink *devlink, - u64 resource_id, - u64 *p_resource_size); +void devl_resources_unregister(struct devlink *devlink); +void devlink_resources_unregister(struct devlink *devlink); +int devl_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size); +int devl_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units); int devlink_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); +void devl_resource_occ_get_register(struct devlink *devlink, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv); void devlink_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv); +void devl_resource_occ_get_unregister(struct devlink *devlink, + u64 resource_id); + void devlink_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id); int devlink_params_register(struct devlink *devlink, @@ -1596,6 +1691,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +struct devlink_region *devl_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, + u64 region_size); struct devlink_region * devlink_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, @@ -1604,6 +1703,7 @@ struct devlink_region * devlink_port_region_create(struct devlink_port *port, const struct devlink_port_region_ops *ops, u32 region_max_snapshots, u64 region_size); +void devl_region_destroy(struct devlink_region *region); void devlink_region_destroy(struct devlink_region *region); void devlink_port_region_destroy(struct devlink_region *region); @@ -1617,15 +1717,31 @@ int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); + +enum devlink_info_version_type { + DEVLINK_INFO_VERSION_TYPE_NONE, + DEVLINK_INFO_VERSION_TYPE_COMPONENT, /* May be used as flash update + * component by name. + */ +}; + int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_name, const char *version_value); int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_name, const char *version_value); +int devlink_info_version_stored_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type); int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value); +int devlink_info_version_running_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type); int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); @@ -1699,9 +1815,15 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink, const char *component, unsigned long timeout); +int devl_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv); int devlink_traps_register(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count, void *priv); +void devl_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count); void devlink_traps_unregister(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count); @@ -1709,20 +1831,26 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, void *trap_ctx, struct devlink_port *in_devlink_port, const struct flow_action_cookie *fa_cookie); void *devlink_trap_ctx_priv(void *trap_ctx); +int devl_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); int devlink_trap_groups_register(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count); +void devl_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count); void devlink_trap_groups_unregister(struct devlink *devlink, const struct devlink_trap_group *groups, size_t groups_count); int -devlink_trap_policers_register(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count); +devl_trap_policers_register(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count); void -devlink_trap_policers_unregister(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count); +devl_trap_policers_unregister(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count); #if IS_ENABLED(CONFIG_NET_DEVLINK) diff --git a/include/net/dn.h b/include/net/dn.h deleted file mode 100644 index ba9655b0098a..000000000000 --- a/include/net/dn.h +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_H -#define _NET_DN_H - -#include <linux/dn.h> -#include <net/sock.h> -#include <net/flow.h> -#include <asm/byteorder.h> -#include <asm/unaligned.h> - -struct dn_scp /* Session Control Port */ -{ - unsigned char state; -#define DN_O 1 /* Open */ -#define DN_CR 2 /* Connect Receive */ -#define DN_DR 3 /* Disconnect Reject */ -#define DN_DRC 4 /* Discon. Rej. Complete*/ -#define DN_CC 5 /* Connect Confirm */ -#define DN_CI 6 /* Connect Initiate */ -#define DN_NR 7 /* No resources */ -#define DN_NC 8 /* No communication */ -#define DN_CD 9 /* Connect Delivery */ -#define DN_RJ 10 /* Rejected */ -#define DN_RUN 11 /* Running */ -#define DN_DI 12 /* Disconnect Initiate */ -#define DN_DIC 13 /* Disconnect Complete */ -#define DN_DN 14 /* Disconnect Notificat */ -#define DN_CL 15 /* Closed */ -#define DN_CN 16 /* Closed Notification */ - - __le16 addrloc; - __le16 addrrem; - __u16 numdat; - __u16 numoth; - __u16 numoth_rcv; - __u16 numdat_rcv; - __u16 ackxmt_dat; - __u16 ackxmt_oth; - __u16 ackrcv_dat; - __u16 ackrcv_oth; - __u8 flowrem_sw; - __u8 flowloc_sw; -#define DN_SEND 2 -#define DN_DONTSEND 1 -#define DN_NOCHANGE 0 - __u16 flowrem_dat; - __u16 flowrem_oth; - __u16 flowloc_dat; - __u16 flowloc_oth; - __u8 services_rem; - __u8 services_loc; - __u8 info_rem; - __u8 info_loc; - - __u16 segsize_rem; - __u16 segsize_loc; - - __u8 nonagle; - __u8 multi_ireq; - __u8 accept_mode; - unsigned long seg_total; /* Running total of current segment */ - - struct optdata_dn conndata_in; - struct optdata_dn conndata_out; - struct optdata_dn discdata_in; - struct optdata_dn discdata_out; - struct accessdata_dn accessdata; - - struct sockaddr_dn addr; /* Local address */ - struct sockaddr_dn peer; /* Remote address */ - - /* - * In this case the RTT estimation is not specified in the - * docs, nor is any back off algorithm. Here we follow well - * known tcp algorithms with a few small variations. - * - * snd_window: Max number of packets we send before we wait for - * an ack to come back. This will become part of a - * more complicated scheme when we support flow - * control. - * - * nsp_srtt: Round-Trip-Time (x8) in jiffies. This is a rolling - * average. - * nsp_rttvar: Round-Trip-Time-Varience (x4) in jiffies. This is the - * varience of the smoothed average (but calculated in - * a simpler way than for normal statistical varience - * calculations). - * - * nsp_rxtshift: Backoff counter. Value is zero normally, each time - * a packet is lost is increases by one until an ack - * is received. Its used to index an array of backoff - * multipliers. - */ -#define NSP_MIN_WINDOW 1 -#define NSP_MAX_WINDOW (0x07fe) - unsigned long max_window; - unsigned long snd_window; -#define NSP_INITIAL_SRTT (HZ) - unsigned long nsp_srtt; -#define NSP_INITIAL_RTTVAR (HZ*3) - unsigned long nsp_rttvar; -#define NSP_MAXRXTSHIFT 12 - unsigned long nsp_rxtshift; - - /* - * Output queues, one for data, one for otherdata/linkservice - */ - struct sk_buff_head data_xmit_queue; - struct sk_buff_head other_xmit_queue; - - /* - * Input queue for other data - */ - struct sk_buff_head other_receive_queue; - int other_report; - - /* - * Stuff to do with the slow timer - */ - unsigned long stamp; /* time of last transmit */ - unsigned long persist; - int (*persist_fxn)(struct sock *sk); - unsigned long keepalive; - void (*keepalive_fxn)(struct sock *sk); - -}; - -static inline struct dn_scp *DN_SK(struct sock *sk) -{ - return (struct dn_scp *)(sk + 1); -} - -/* - * src,dst : Source and Destination DECnet addresses - * hops : Number of hops through the network - * dst_port, src_port : NSP port numbers - * services, info : Useful data extracted from conninit messages - * rt_flags : Routing flags byte - * nsp_flags : NSP layer flags byte - * segsize : Size of segment - * segnum : Number, for data, otherdata and linkservice - * xmit_count : Number of times we've transmitted this skb - * stamp : Time stamp of most recent transmission, used in RTT calculations - * iif: Input interface number - * - * As a general policy, this structure keeps all addresses in network - * byte order, and all else in host byte order. Thus dst, src, dst_port - * and src_port are in network order. All else is in host order. - * - */ -#define DN_SKB_CB(skb) ((struct dn_skb_cb *)(skb)->cb) -struct dn_skb_cb { - __le16 dst; - __le16 src; - __u16 hops; - __le16 dst_port; - __le16 src_port; - __u8 services; - __u8 info; - __u8 rt_flags; - __u8 nsp_flags; - __u16 segsize; - __u16 segnum; - __u16 xmit_count; - unsigned long stamp; - int iif; -}; - -static inline __le16 dn_eth2dn(const unsigned char *ethaddr) -{ - return get_unaligned((__le16 *)(ethaddr + 4)); -} - -static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) -{ - return *(__le16 *)saddr->sdn_nodeaddr; -} - -static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr) -{ - __u16 a = le16_to_cpu(addr); - ethaddr[0] = 0xAA; - ethaddr[1] = 0x00; - ethaddr[2] = 0x04; - ethaddr[3] = 0x00; - ethaddr[4] = (__u8)(a & 0xff); - ethaddr[5] = (__u8)(a >> 8); -} - -static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp) -{ - fld->fld_sport = scp->addrloc; - fld->fld_dport = scp->addrrem; -} - -unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu); -void dn_register_sysctl(void); -void dn_unregister_sysctl(void); - -#define DN_MENUVER_ACC 0x01 -#define DN_MENUVER_USR 0x02 -#define DN_MENUVER_PRX 0x04 -#define DN_MENUVER_UIC 0x08 - -struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr); -struct sock *dn_find_by_skb(struct sk_buff *skb); -#define DN_ASCBUF_LEN 9 -char *dn_addr2asc(__u16, char *); -int dn_destroy_timer(struct sock *sk); - -int dn_sockaddr2username(struct sockaddr_dn *addr, unsigned char *buf, - unsigned char type); -int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *addr, - unsigned char *type); - -void dn_start_slow_timer(struct sock *sk); -void dn_stop_slow_timer(struct sock *sk); - -extern __le16 decnet_address; -extern int decnet_debug_level; -extern int decnet_time_wait; -extern int decnet_dn_count; -extern int decnet_di_count; -extern int decnet_dr_count; -extern int decnet_no_fc_max_cwnd; - -extern long sysctl_decnet_mem[3]; -extern int sysctl_decnet_wmem[3]; -extern int sysctl_decnet_rmem[3]; - -#endif /* _NET_DN_H */ diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h deleted file mode 100644 index 595b4f6c1eb1..000000000000 --- a/include/net/dn_dev.h +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_DEV_H -#define _NET_DN_DEV_H - - -struct dn_dev; - -struct dn_ifaddr { - struct dn_ifaddr __rcu *ifa_next; - struct dn_dev *ifa_dev; - __le16 ifa_local; - __le16 ifa_address; - __u32 ifa_flags; - __u8 ifa_scope; - char ifa_label[IFNAMSIZ]; - struct rcu_head rcu; -}; - -#define DN_DEV_S_RU 0 /* Run - working normally */ -#define DN_DEV_S_CR 1 /* Circuit Rejected */ -#define DN_DEV_S_DS 2 /* Data Link Start */ -#define DN_DEV_S_RI 3 /* Routing Layer Initialize */ -#define DN_DEV_S_RV 4 /* Routing Layer Verify */ -#define DN_DEV_S_RC 5 /* Routing Layer Complete */ -#define DN_DEV_S_OF 6 /* Off */ -#define DN_DEV_S_HA 7 /* Halt */ - - -/* - * The dn_dev_parms structure contains the set of parameters - * for each device (hence inclusion in the dn_dev structure) - * and an array is used to store the default types of supported - * device (in dn_dev.c). - * - * The type field matches the ARPHRD_ constants and is used in - * searching the list for supported devices when new devices - * come up. - * - * The mode field is used to find out if a device is broadcast, - * multipoint, or pointopoint. Please note that DECnet thinks - * different ways about devices to the rest of the kernel - * so the normal IFF_xxx flags are invalid here. For devices - * which can be any combination of the previously mentioned - * attributes, you can set this on a per device basis by - * installing an up() routine. - * - * The device state field, defines the initial state in which the - * device will come up. In the dn_dev structure, it is the actual - * state. - * - * Things have changed here. I've killed timer1 since it's a user space - * issue for a user space routing deamon to sort out. The kernel does - * not need to be bothered with it. - * - * Timers: - * t2 - Rate limit timer, min time between routing and hello messages - * t3 - Hello timer, send hello messages when it expires - * - * Callbacks: - * up() - Called to initialize device, return value can veto use of - * device with DECnet. - * down() - Called to turn device off when it goes down - * timer3() - Called once for each ifaddr when timer 3 goes off - * - * sysctl - Hook for sysctl things - * - */ -struct dn_dev_parms { - int type; /* ARPHRD_xxx */ - int mode; /* Broadcast, Unicast, Mulitpoint */ -#define DN_DEV_BCAST 1 -#define DN_DEV_UCAST 2 -#define DN_DEV_MPOINT 4 - int state; /* Initial state */ - int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */ - unsigned long t2; /* Default value of t2 */ - unsigned long t3; /* Default value of t3 */ - int priority; /* Priority to be a router */ - char *name; /* Name for sysctl */ - int (*up)(struct net_device *); - void (*down)(struct net_device *); - void (*timer3)(struct net_device *, struct dn_ifaddr *ifa); - void *sysctl; -}; - - -struct dn_dev { - struct dn_ifaddr __rcu *ifa_list; - struct net_device *dev; - struct dn_dev_parms parms; - char use_long; - struct timer_list timer; - unsigned long t3; - struct neigh_parms *neigh_parms; - __u8 addr[ETH_ALEN]; - struct neighbour *router; /* Default router on circuit */ - struct neighbour *peer; /* Peer on pointopoint links */ - unsigned long uptime; /* Time device went up in jiffies */ -}; - -struct dn_short_packet { - __u8 msgflg; - __le16 dstnode; - __le16 srcnode; - __u8 forward; -} __packed; - -struct dn_long_packet { - __u8 msgflg; - __u8 d_area; - __u8 d_subarea; - __u8 d_id[6]; - __u8 s_area; - __u8 s_subarea; - __u8 s_id[6]; - __u8 nl2; - __u8 visit_ct; - __u8 s_class; - __u8 pt; -} __packed; - -/*------------------------- DRP - Routing messages ---------------------*/ - -struct endnode_hello_message { - __u8 msgflg; - __u8 tiver[3]; - __u8 id[6]; - __u8 iinfo; - __le16 blksize; - __u8 area; - __u8 seed[8]; - __u8 neighbor[6]; - __le16 timer; - __u8 mpd; - __u8 datalen; - __u8 data[2]; -} __packed; - -struct rtnode_hello_message { - __u8 msgflg; - __u8 tiver[3]; - __u8 id[6]; - __u8 iinfo; - __le16 blksize; - __u8 priority; - __u8 area; - __le16 timer; - __u8 mpd; -} __packed; - - -void dn_dev_init(void); -void dn_dev_cleanup(void); - -int dn_dev_ioctl(unsigned int cmd, void __user *arg); - -void dn_dev_devices_off(void); -void dn_dev_devices_on(void); - -void dn_dev_init_pkt(struct sk_buff *skb); -void dn_dev_veri_pkt(struct sk_buff *skb); -void dn_dev_hello(struct sk_buff *skb); - -void dn_dev_up(struct net_device *); -void dn_dev_down(struct net_device *); - -int dn_dev_set_default(struct net_device *dev, int force); -struct net_device *dn_dev_get_default(void); -int dn_dev_bind_default(__le16 *addr); - -int register_dnaddr_notifier(struct notifier_block *nb); -int unregister_dnaddr_notifier(struct notifier_block *nb); - -static inline int dn_dev_islocal(struct net_device *dev, __le16 addr) -{ - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int res = 0; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) { - printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n"); - goto out; - } - - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa != NULL; - ifa = rcu_dereference(ifa->ifa_next)) - if ((addr ^ ifa->ifa_local) == 0) { - res = 1; - break; - } -out: - rcu_read_unlock(); - return res; -} - -#endif /* _NET_DN_DEV_H */ diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h deleted file mode 100644 index ddd6565957b3..000000000000 --- a/include/net/dn_fib.h +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_FIB_H -#define _NET_DN_FIB_H - -#include <linux/netlink.h> -#include <linux/refcount.h> - -extern const struct nla_policy rtm_dn_policy[]; - -struct dn_fib_res { - struct fib_rule *r; - struct dn_fib_info *fi; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; -}; - -struct dn_fib_nh { - struct net_device *nh_dev; - unsigned int nh_flags; - unsigned char nh_scope; - int nh_weight; - int nh_power; - int nh_oif; - __le16 nh_gw; -}; - -struct dn_fib_info { - struct dn_fib_info *fib_next; - struct dn_fib_info *fib_prev; - refcount_t fib_treeref; - refcount_t fib_clntref; - int fib_dead; - unsigned int fib_flags; - int fib_protocol; - __le16 fib_prefsrc; - __u32 fib_priority; - __u32 fib_metrics[RTAX_MAX]; - int fib_nhs; - int fib_power; - struct dn_fib_nh fib_nh[0]; -#define dn_fib_dev fib_nh[0].nh_dev -}; - - -#define DN_FIB_RES_RESET(res) ((res).nh_sel = 0) -#define DN_FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) - -#define DN_FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __dn_fib_res_prefsrc(&res)) -#define DN_FIB_RES_GW(res) (DN_FIB_RES_NH(res).nh_gw) -#define DN_FIB_RES_DEV(res) (DN_FIB_RES_NH(res).nh_dev) -#define DN_FIB_RES_OIF(res) (DN_FIB_RES_NH(res).nh_oif) - -typedef struct { - __le16 datum; -} dn_fib_key_t; - -typedef struct { - __le16 datum; -} dn_fib_hash_t; - -typedef struct { - __u16 datum; -} dn_fib_idx_t; - -struct dn_fib_node { - struct dn_fib_node *fn_next; - struct dn_fib_info *fn_info; -#define DN_FIB_INFO(f) ((f)->fn_info) - dn_fib_key_t fn_key; - u8 fn_type; - u8 fn_scope; - u8 fn_state; -}; - - -struct dn_fib_table { - struct hlist_node hlist; - u32 n; - - int (*insert)(struct dn_fib_table *t, struct rtmsg *r, - struct nlattr *attrs[], struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*delete)(struct dn_fib_table *t, struct rtmsg *r, - struct nlattr *attrs[], struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld, - struct dn_fib_res *res); - int (*flush)(struct dn_fib_table *t); - int (*dump)(struct dn_fib_table *t, struct sk_buff *skb, struct netlink_callback *cb); - - unsigned char data[]; -}; - -#ifdef CONFIG_DECNET_ROUTER -/* - * dn_fib.c - */ -void dn_fib_init(void); -void dn_fib_cleanup(void); - -int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, - struct nlattr *attrs[], - const struct nlmsghdr *nlh, int *errp); -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, - const struct flowidn *fld, struct dn_fib_res *res); -void dn_fib_release_info(struct dn_fib_info *fi); -void dn_fib_flush(void); -void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res); - -/* - * dn_tables.c - */ -struct dn_fib_table *dn_fib_get_table(u32 n, int creat); -struct dn_fib_table *dn_fib_empty_table(void); -void dn_fib_table_init(void); -void dn_fib_table_cleanup(void); - -/* - * dn_rules.c - */ -void dn_fib_rules_init(void); -void dn_fib_rules_cleanup(void); -unsigned int dnet_addr_type(__le16 addr); -int dn_fib_lookup(struct flowidn *fld, struct dn_fib_res *res); - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb); - -void dn_fib_free_info(struct dn_fib_info *fi); - -static inline void dn_fib_info_put(struct dn_fib_info *fi) -{ - if (refcount_dec_and_test(&fi->fib_clntref)) - dn_fib_free_info(fi); -} - -static inline void dn_fib_res_put(struct dn_fib_res *res) -{ - if (res->fi) - dn_fib_info_put(res->fi); - if (res->r) - fib_rule_put(res->r); -} - -#else /* Endnode */ - -#define dn_fib_init() do { } while(0) -#define dn_fib_cleanup() do { } while(0) - -#define dn_fib_lookup(fl, res) (-ESRCH) -#define dn_fib_info_put(fi) do { } while(0) -#define dn_fib_select_multipath(fl, res) do { } while(0) -#define dn_fib_rules_policy(saddr,res,flags) (0) -#define dn_fib_res_put(res) do { } while(0) - -#endif /* CONFIG_DECNET_ROUTER */ - -static inline __le16 dnet_make_mask(int n) -{ - if (n) - return cpu_to_le16(~((1 << (16 - n)) - 1)); - return cpu_to_le16(0); -} - -#endif /* _NET_DN_FIB_H */ diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h deleted file mode 100644 index 2e3e7793973a..000000000000 --- a/include/net/dn_neigh.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_DN_NEIGH_H -#define _NET_DN_NEIGH_H - -/* - * The position of the first two fields of - * this structure are critical - SJW - */ -struct dn_neigh { - struct neighbour n; - __le16 addr; - unsigned long flags; -#define DN_NDFLAG_R1 0x0001 /* Router L1 */ -#define DN_NDFLAG_R2 0x0002 /* Router L2 */ -#define DN_NDFLAG_P3 0x0004 /* Phase III Node */ - unsigned long blksize; - __u8 priority; -}; - -void dn_neigh_init(void); -void dn_neigh_cleanup(void); -int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb); -int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb); -void dn_neigh_pointopoint_hello(struct sk_buff *skb); -int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n); -int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb); - -extern struct neigh_table dn_neigh_table; - -#endif /* _NET_DN_NEIGH_H */ diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h deleted file mode 100644 index f83932b864a9..000000000000 --- a/include/net/dn_nsp.h +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_DN_NSP_H -#define _NET_DN_NSP_H -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ -/* dn_nsp.c functions prototyping */ - -void dn_nsp_send_data_ack(struct sock *sk); -void dn_nsp_send_oth_ack(struct sock *sk); -void dn_send_conn_ack(struct sock *sk); -void dn_send_conn_conf(struct sock *sk, gfp_t gfp); -void dn_nsp_send_disc(struct sock *sk, unsigned char type, - unsigned short reason, gfp_t gfp); -void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type, - unsigned short reason); -void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval); -void dn_nsp_send_conninit(struct sock *sk, unsigned char flags); - -void dn_nsp_output(struct sock *sk); -int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *q, unsigned short acknum); -void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp, - int oob); -unsigned long dn_nsp_persist(struct sock *sk); -int dn_nsp_xmit_timeout(struct sock *sk); - -int dn_nsp_rx(struct sk_buff *); -int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb); - -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); -struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, - long timeo, int *err); - -#define NSP_REASON_OK 0 /* No error */ -#define NSP_REASON_NR 1 /* No resources */ -#define NSP_REASON_UN 2 /* Unrecognised node name */ -#define NSP_REASON_SD 3 /* Node shutting down */ -#define NSP_REASON_ID 4 /* Invalid destination end user */ -#define NSP_REASON_ER 5 /* End user lacks resources */ -#define NSP_REASON_OB 6 /* Object too busy */ -#define NSP_REASON_US 7 /* Unspecified error */ -#define NSP_REASON_TP 8 /* Third-Party abort */ -#define NSP_REASON_EA 9 /* End user has aborted the link */ -#define NSP_REASON_IF 10 /* Invalid node name format */ -#define NSP_REASON_LS 11 /* Local node shutdown */ -#define NSP_REASON_LL 32 /* Node lacks logical-link resources */ -#define NSP_REASON_LE 33 /* End user lacks logical-link resources */ -#define NSP_REASON_UR 34 /* Unacceptable RQSTRID or PASSWORD field */ -#define NSP_REASON_UA 36 /* Unacceptable ACCOUNT field */ -#define NSP_REASON_TM 38 /* End user timed out logical link */ -#define NSP_REASON_NU 39 /* Node unreachable */ -#define NSP_REASON_NL 41 /* No-link message */ -#define NSP_REASON_DC 42 /* Disconnect confirm */ -#define NSP_REASON_IO 43 /* Image data field overflow */ - -#define NSP_DISCINIT 0x38 -#define NSP_DISCCONF 0x48 - -/*------------------------- NSP - messages ------------------------------*/ -/* Data Messages */ -/*---------------*/ - -/* Data Messages (data segment/interrupt/link service) */ - -struct nsp_data_seg_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; -} __packed; - -struct nsp_data_opt_msg { - __le16 acknum; - __le16 segnum; - __le16 lsflgs; -} __packed; - -struct nsp_data_opt_msg1 { - __le16 acknum; - __le16 segnum; -} __packed; - - -/* Acknowledgment Message (data/other data) */ -struct nsp_data_ack_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; - __le16 acknum; -} __packed; - -/* Connect Acknowledgment Message */ -struct nsp_conn_ack_msg { - __u8 msgflg; - __le16 dstaddr; -} __packed; - - -/* Connect Initiate/Retransmit Initiate/Connect Confirm */ -struct nsp_conn_init_msg { - __u8 msgflg; -#define NSP_CI 0x18 /* Connect Initiate */ -#define NSP_RCI 0x68 /* Retrans. Conn Init */ - __le16 dstaddr; - __le16 srcaddr; - __u8 services; -#define NSP_FC_NONE 0x00 /* Flow Control None */ -#define NSP_FC_SRC 0x04 /* Seg Req. Count */ -#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */ -#define NSP_FC_MASK 0x0c /* FC type mask */ - __u8 info; - __le16 segsize; -} __packed; - -/* Disconnect Initiate/Disconnect Confirm */ -struct nsp_disconn_init_msg { - __u8 msgflg; - __le16 dstaddr; - __le16 srcaddr; - __le16 reason; -} __packed; - - - -struct srcobj_fmt { - __u8 format; - __u8 task; - __le16 grpcode; - __le16 usrcode; - __u8 dlen; -} __packed; - -/* - * A collection of functions for manipulating the sequence - * numbers used in NSP. Similar in operation to the functions - * of the same name in TCP. - */ -static __inline__ int dn_before(__u16 seq1, __u16 seq2) -{ - seq1 &= 0x0fff; - seq2 &= 0x0fff; - - return (int)((seq1 - seq2) & 0x0fff) > 2048; -} - - -static __inline__ int dn_after(__u16 seq1, __u16 seq2) -{ - seq1 &= 0x0fff; - seq2 &= 0x0fff; - - return (int)((seq2 - seq1) & 0x0fff) > 2048; -} - -static __inline__ int dn_equal(__u16 seq1, __u16 seq2) -{ - return ((seq1 ^ seq2) & 0x0fff) == 0; -} - -static __inline__ int dn_before_or_equal(__u16 seq1, __u16 seq2) -{ - return (dn_before(seq1, seq2) || dn_equal(seq1, seq2)); -} - -static __inline__ void seq_add(__u16 *seq, __u16 off) -{ - (*seq) += off; - (*seq) &= 0x0fff; -} - -static __inline__ int seq_next(__u16 seq1, __u16 seq2) -{ - return dn_equal(seq1 + 1, seq2); -} - -/* - * Can we delay the ack ? - */ -static __inline__ int sendack(__u16 seq) -{ - return (int)((seq & 0x1000) ? 0 : 1); -} - -/* - * Is socket congested ? - */ -static __inline__ int dn_congested(struct sock *sk) -{ - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); -} - -#define DN_MAX_NSP_DATA_HEADER (11) - -#endif /* _NET_DN_NSP_H */ diff --git a/include/net/dn_route.h b/include/net/dn_route.h deleted file mode 100644 index 6f1e94ac0bdf..000000000000 --- a/include/net/dn_route.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_DN_ROUTE_H -#define _NET_DN_ROUTE_H - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - -*******************************************************************************/ - -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); -int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *, - struct sock *sk, int flags); -int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); -void dn_rt_cache_flush(int delay); -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); - -/* Masks for flags field */ -#define DN_RT_F_PID 0x07 /* Mask for packet type */ -#define DN_RT_F_PF 0x80 /* Padding Follows */ -#define DN_RT_F_VER 0x40 /* Version =0 discard packet if ==1 */ -#define DN_RT_F_IE 0x20 /* Intra Ethernet, Reserved in short pkt */ -#define DN_RT_F_RTS 0x10 /* Packet is being returned to sender */ -#define DN_RT_F_RQR 0x08 /* Return packet to sender upon non-delivery */ - -/* Mask for types of routing packets */ -#define DN_RT_PKT_MSK 0x06 -/* Types of routing packets */ -#define DN_RT_PKT_SHORT 0x02 /* Short routing packet */ -#define DN_RT_PKT_LONG 0x06 /* Long routing packet */ - -/* Mask for control/routing selection */ -#define DN_RT_PKT_CNTL 0x01 /* Set to 1 if a control packet */ -/* Types of control packets */ -#define DN_RT_CNTL_MSK 0x0f /* Mask for control packets */ -#define DN_RT_PKT_INIT 0x01 /* Initialisation packet */ -#define DN_RT_PKT_VERI 0x03 /* Verification Message */ -#define DN_RT_PKT_HELO 0x05 /* Hello and Test Message */ -#define DN_RT_PKT_L1RT 0x07 /* Level 1 Routing Message */ -#define DN_RT_PKT_L2RT 0x09 /* Level 2 Routing Message */ -#define DN_RT_PKT_ERTH 0x0b /* Ethernet Router Hello */ -#define DN_RT_PKT_EEDH 0x0d /* Ethernet EndNode Hello */ - -/* Values for info field in hello message */ -#define DN_RT_INFO_TYPE 0x03 /* Type mask */ -#define DN_RT_INFO_L1RT 0x02 /* L1 Router */ -#define DN_RT_INFO_L2RT 0x01 /* L2 Router */ -#define DN_RT_INFO_ENDN 0x03 /* EndNode */ -#define DN_RT_INFO_VERI 0x04 /* Verification Reqd. */ -#define DN_RT_INFO_RJCT 0x08 /* Reject Flag, Reserved */ -#define DN_RT_INFO_VFLD 0x10 /* Verification Failed, Reserved */ -#define DN_RT_INFO_NOML 0x20 /* No Multicast traffic accepted */ -#define DN_RT_INFO_BLKR 0x40 /* Blocking Requested */ - -/* - * The fl structure is what we used to look up the route. - * The rt_saddr & rt_daddr entries are the same as key.saddr & key.daddr - * except for local input routes, where the rt_saddr = fl.fld_dst and - * rt_daddr = fl.fld_src to allow the route to be used for returning - * packets to the originating host. - */ -struct dn_route { - struct dst_entry dst; - struct dn_route __rcu *dn_next; - - struct neighbour *n; - - struct flowidn fld; - - __le16 rt_saddr; - __le16 rt_daddr; - __le16 rt_gateway; - __le16 rt_local_src; /* Source used for forwarding packets */ - __le16 rt_src_map; - __le16 rt_dst_map; - - unsigned int rt_flags; - unsigned int rt_type; -}; - -static inline bool dn_is_input_route(struct dn_route *rt) -{ - return rt->fld.flowidn_iif != 0; -} - -static inline bool dn_is_output_route(struct dn_route *rt) -{ - return rt->fld.flowidn_iif == 0; -} - -void dn_route_init(void); -void dn_route_cleanup(void); - -#include <net/sock.h> -#include <linux/if_arp.h> - -static inline void dn_rt_send(struct sk_buff *skb) -{ - dev_queue_xmit(skb); -} - -static inline void dn_rt_finish_output(struct sk_buff *skb, char *dst, char *src) -{ - struct net_device *dev = skb->dev; - - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) - dst = NULL; - - if (dev_hard_header(skb, dev, ETH_P_DNA_RT, dst, src, skb->len) >= 0) - dn_rt_send(skb); - else - kfree_skb(skb); -} - -#endif /* _NET_DN_ROUTE_H */ diff --git a/include/net/dropreason.h b/include/net/dropreason.h new file mode 100644 index 000000000000..c1cbcdbaf149 --- /dev/null +++ b/include/net/dropreason.h @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_H +#define _LINUX_DROPREASON_H + +#define DEFINE_DROP_REASON(FN, FNe) \ + FN(NOT_SPECIFIED) \ + FN(NO_SOCKET) \ + FN(PKT_TOO_SMALL) \ + FN(TCP_CSUM) \ + FN(SOCKET_FILTER) \ + FN(UDP_CSUM) \ + FN(NETFILTER_DROP) \ + FN(OTHERHOST) \ + FN(IP_CSUM) \ + FN(IP_INHDR) \ + FN(IP_RPFILTER) \ + FN(UNICAST_IN_L2_MULTICAST) \ + FN(XFRM_POLICY) \ + FN(IP_NOPROTO) \ + FN(SOCKET_RCVBUFF) \ + FN(PROTO_MEM) \ + FN(TCP_MD5NOTFOUND) \ + FN(TCP_MD5UNEXPECTED) \ + FN(TCP_MD5FAILURE) \ + FN(SOCKET_BACKLOG) \ + FN(TCP_FLAGS) \ + FN(TCP_ZEROWINDOW) \ + FN(TCP_OLD_DATA) \ + FN(TCP_OVERWINDOW) \ + FN(TCP_OFOMERGE) \ + FN(TCP_RFC7323_PAWS) \ + FN(TCP_INVALID_SEQUENCE) \ + FN(TCP_RESET) \ + FN(TCP_INVALID_SYN) \ + FN(TCP_CLOSE) \ + FN(TCP_FASTOPEN) \ + FN(TCP_OLD_ACK) \ + FN(TCP_TOO_OLD_ACK) \ + FN(TCP_ACK_UNSENT_DATA) \ + FN(TCP_OFO_QUEUE_PRUNE) \ + FN(TCP_OFO_DROP) \ + FN(IP_OUTNOROUTES) \ + FN(BPF_CGROUP_EGRESS) \ + FN(IPV6DISABLED) \ + FN(NEIGH_CREATEFAIL) \ + FN(NEIGH_FAILED) \ + FN(NEIGH_QUEUEFULL) \ + FN(NEIGH_DEAD) \ + FN(TC_EGRESS) \ + FN(QDISC_DROP) \ + FN(CPU_BACKLOG) \ + FN(XDP) \ + FN(TC_INGRESS) \ + FN(UNHANDLED_PROTO) \ + FN(SKB_CSUM) \ + FN(SKB_GSO_SEG) \ + FN(SKB_UCOPY_FAULT) \ + FN(DEV_HDR) \ + FN(DEV_READY) \ + FN(FULL_RING) \ + FN(NOMEM) \ + FN(HDR_TRUNC) \ + FN(TAP_FILTER) \ + FN(TAP_TXFILTER) \ + FN(ICMP_CSUM) \ + FN(INVALID_PROTO) \ + FN(IP_INADDRERRORS) \ + FN(IP_INNOROUTES) \ + FN(PKT_TOO_BIG) \ + FNe(MAX) + +/** + * enum skb_drop_reason - the reasons of skb drops + * + * The reason of skb drop, which is used in kfree_skb_reason(). + */ +enum skb_drop_reason { + /** + * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) + */ + SKB_NOT_DROPPED_YET = 0, + /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ + SKB_DROP_REASON_NOT_SPECIFIED, + /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ + SKB_DROP_REASON_PKT_TOO_SMALL, + /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ + SKB_DROP_REASON_TCP_CSUM, + /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ + SKB_DROP_REASON_SOCKET_FILTER, + /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */ + SKB_DROP_REASON_UDP_CSUM, + /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */ + SKB_DROP_REASON_NETFILTER_DROP, + /** + * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host + * (interface is in promisc mode) + */ + SKB_DROP_REASON_OTHERHOST, + /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */ + SKB_DROP_REASON_IP_CSUM, + /** + * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_INHDR, + /** + * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the + * document for rp_filter in ip-sysctl.rst for more information + */ + SKB_DROP_REASON_IP_RPFILTER, + /** + * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is + * multicast, but L3 is unicast. + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, + /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */ + SKB_DROP_REASON_XFRM_POLICY, + /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */ + SKB_DROP_REASON_IP_NOPROTO, + /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ + SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet + * drop out of udp_memory_allocated. + */ + SKB_DROP_REASON_PROTO_MEM, + /** + * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, + * corresponding to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, + /** + * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting + * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding + * to LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_TCP_MD5FAILURE, + /** + * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( + * see LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_SOCKET_BACKLOG, + /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ + SKB_DROP_REASON_TCP_FLAGS, + /** + * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_ZEROWINDOW, + /** + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * received before (spurious retrans may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OLD_DATA, + /** + * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window, + * the seq of the first byte exceed the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OVERWINDOW, + /** + * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo + * queue, corresponding to LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_OFOMERGE, + /** + * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ + SKB_DROP_REASON_TCP_RESET, + /** + * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected + * SYN flag + */ + SKB_DROP_REASON_TCP_INVALID_SYN, + /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_CLOSE, + /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_FASTOPEN, + /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_OLD_ACK, + /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, + /** + * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't + * sent yet + */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, + /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, + /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */ + SKB_DROP_REASON_TCP_OFO_DROP, + /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */ + SKB_DROP_REASON_IP_OUTNOROUTES, + /** + * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, + /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */ + SKB_DROP_REASON_IPV6DISABLED, + /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, + /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_FAILED, + /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, + /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */ + SKB_DROP_REASON_NEIGH_DEAD, + /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ + SKB_DROP_REASON_TC_EGRESS, + /** + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( + * failed to enqueue to current qdisc) + */ + SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU + * backlog queue. This can be caused by backlog queue full (see + * netdev_max_backlog in net.rst) or RPS flow limit + */ + SKB_DROP_REASON_CPU_BACKLOG, + /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ + SKB_DROP_REASON_XDP, + /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ + SKB_DROP_REASON_TC_INGRESS, + /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */ + SKB_DROP_REASON_UNHANDLED_PROTO, + /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */ + SKB_DROP_REASON_SKB_CSUM, + /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ + SKB_DROP_REASON_SKB_GSO_SEG, + /** + * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, + * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, + /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */ + SKB_DROP_REASON_DEV_HDR, + /** + * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to + * any of its data structure that is not up/ready/initialized, + * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq] + * is not initialized + */ + SKB_DROP_REASON_DEV_READY, + /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */ + SKB_DROP_REASON_FULL_RING, + /** @SKB_DROP_REASON_NOMEM: error due to OOM */ + SKB_DROP_REASON_NOMEM, + /** + * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from + * networking data, e.g., failed to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_HDR_TRUNC, + /** + * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached + * to tun/tap, e.g., via TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_FILTER, + /** + * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at + * tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_TAP_TXFILTER, + /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */ + SKB_DROP_REASON_ICMP_CSUM, + /** + * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211, + * such as a broadcasts ICMP_TIMESTAMP + */ + SKB_DROP_REASON_INVALID_PROTO, + /** + * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INADDRERRORS, + /** + * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, + /** + * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the + * MTU) + */ + SKB_DROP_REASON_PKT_TOO_BIG, + /** + * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be + * used as a real 'reason' + */ + SKB_DROP_REASON_MAX, +}; + +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + +extern const char * const drop_reasons[]; + +#endif diff --git a/include/net/dsa.h b/include/net/dsa.h index eff5c44ba377..ee369670e20e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -52,6 +52,9 @@ struct phylink_link_state; #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 +#define DSA_TAG_PROTO_RTL8_4T_VALUE 25 +#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 +#define DSA_TAG_PROTO_LAN937X_VALUE 27 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -79,6 +82,9 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, + DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, + DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, + DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, }; struct dsa_switch; @@ -88,6 +94,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + int (*connect)(struct dsa_switch *ds); + void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -114,9 +122,20 @@ struct dsa_netdevice_ops { #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) +struct dsa_lag { + struct net_device *dev; + unsigned int id; + struct mutex fdb_lock; + struct list_head fdbs; + refcount_t refcount; +}; + struct dsa_switch_tree { struct list_head list; + /* List of switch ports */ + struct list_head ports; + /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; @@ -126,8 +145,10 @@ struct dsa_switch_tree { /* Number of switches attached to this tree */ struct kref refcount; - /* Has this tree been applied to the hardware? */ - bool setup; + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -137,54 +158,55 @@ struct dsa_switch_tree { */ enum dsa_tag_protocol default_proto; + /* Has this tree been applied to the hardware? */ + bool setup; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. */ struct dsa_platform_data *pd; - /* List of switch ports */ - struct list_head ports; - /* List of DSA links composing the routing table */ struct list_head rtable; - /* Maps offloaded LAG netdevs to a zero-based linear ID for - * drivers that need it. - */ - struct net_device **lags; + /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ unsigned int last_switch; }; +/* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ - for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ - if ((_dst)->lags[(_id)]) + for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ - if ((_dp)->lag_dev == (_lag)) + if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) -static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, - unsigned int id) +static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, + unsigned int id) { - return dst->lags[id]; + /* DSA LAG IDs are one-based, dst->lags is zero-based */ + return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, - struct net_device *lag) + struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { - if (dsa_lag_dev(dst, id) == lag) - return id; + struct dsa_lag *lag = dsa_lag_by_id(dst, id); + + if (lag->dev == lag_dev) + return lag->id; } return -ENODEV; @@ -219,6 +241,12 @@ struct dsa_mall_tc_entry { }; }; +struct dsa_bridge { + struct net_device *dev; + unsigned int num; + bool tx_fwd_offload; + refcount_t refcount; +}; struct dsa_port { /* A CPU port is physically connected to a master device. @@ -238,6 +266,10 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); + struct dsa_switch *ds; + + unsigned int index; + enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, @@ -245,36 +277,45 @@ struct dsa_port { DSA_PORT_TYPE_USER, } type; - struct dsa_switch *ds; - unsigned int index; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; + + u8 stp_state; + + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u8 vlan_filtering:1; + + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + u8 learning:1; + + u8 lag_tx_enabled:1; + + /* Master state bits, valid only on CPU ports */ + u8 master_admin_up:1; + u8 master_oper_up:1; + + /* Valid only on user ports */ + u8 cpu_port_in_lag:1; + + u8 setup:1; + struct device_node *dn; unsigned int ageing_time; - bool vlan_filtering; - /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ - bool learning; - u8 stp_state; - struct net_device *bridge_dev; - int bridge_num; + + struct dsa_bridge *bridge; struct devlink_port devlink_port; - bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; - struct net_device *lag_dev; - bool lag_tx_enabled; + struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; /* - * Give the switch driver somewhere to hang its per-port private data - * structures (accessible from the tagger). - */ - void *priv; - - /* * Original copy of the master netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -291,7 +332,9 @@ struct dsa_port { struct list_head fdbs; struct list_head mdbs; - bool setup; + /* List of VLANs that CPU and DSA ports are members of. */ + struct mutex vlans_lock; + struct list_head vlans; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -305,16 +348,37 @@ struct dsa_link { struct list_head list; }; +enum dsa_db_type { + DSA_DB_PORT, + DSA_DB_LAG, + DSA_DB_BRIDGE, +}; + +struct dsa_db { + enum dsa_db_type type; + + union { + const struct dsa_port *dp; + struct dsa_lag lag; + struct dsa_bridge bridge; + }; +}; + struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; + struct dsa_db db; }; -struct dsa_switch { - bool setup; +struct dsa_vlan { + u16 vid; + refcount_t refcount; + struct list_head list; +}; +struct dsa_switch { struct device *dev; /* @@ -323,6 +387,59 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u32 setup:1; + + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + u32 vlan_filtering_is_global:1; + + /* Keep VLAN filtering enabled on ports not offloading any upper */ + u32 needs_standalone_vlan_filtering:1; + + /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges + * that have vlan_filtering=0. All drivers should ideally set this (and + * then the option would get removed), but it is unknown whether this + * would break things or not. + */ + u32 configure_vlan_while_not_filtering:1; + + /* If the switch driver always programs the CPU port as egress tagged + * despite the VLAN configuration indicating otherwise, then setting + * @untag_bridge_pvid will force the DSA receive path to pop the + * bridge's default_pvid VLAN tagged frames to offer a consistent + * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge + * device. + */ + u32 untag_bridge_pvid:1; + + /* Let DSA manage the FDB entries towards the + * CPU, based on the software bridge database. + */ + u32 assisted_learning_on_cpu_port:1; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + u32 vlan_filtering:1; + + /* For switches that only have the MRU configurable. To ensure the + * configured MTU is not exceeded, normalization of MRU on all bridged + * interfaces is needed. + */ + u32 mtu_enforcement_ingress:1; + + /* Drivers that isolate the FDBs of multiple bridges must set this + * to true to receive the bridge as an argument in .port_fdb_{add,del} + * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be + * passed as zero. + */ + u32 fdb_isolation:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -332,6 +449,8 @@ struct dsa_switch { */ void *priv; + void *tagger_data; + /* * Configuration data for this switch. */ @@ -361,50 +480,6 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; - /* Disallow bridge core from requesting different VLAN awareness - * settings on ports if not hardware-supported - */ - bool vlan_filtering_is_global; - - /* Keep VLAN filtering enabled on ports not offloading any upper. */ - bool needs_standalone_vlan_filtering; - - /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges - * that have vlan_filtering=0. All drivers should ideally set this (and - * then the option would get removed), but it is unknown whether this - * would break things or not. - */ - bool configure_vlan_while_not_filtering; - - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the bridge's - * default_pvid VLAN tagged frames to offer a consistent behavior - * between a vlan_filtering=0 and vlan_filtering=1 bridge device. - */ - bool untag_bridge_pvid; - - /* Let DSA manage the FDB entries towards the CPU, based on the - * software bridge database. - */ - bool assisted_learning_on_cpu_port; - - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ - bool vlan_filtering; - - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - bool pcs_poll; - - /* For switches that only have the MRU configurable. To ensure the - * configured MTU is not exceeded, normalization of MRU on all bridged - * interfaces is needed. - */ - bool mtu_enforcement_ingress; - /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at @@ -413,14 +488,14 @@ struct dsa_switch { */ unsigned int num_lag_ids; - /* Drivers that support bridge forwarding offload should set this to - * the maximum number of bridges spanning the same switch tree (or all - * trees, in the case of cross-tree bridging support) that can be - * offloaded. + /* Drivers that support bridge forwarding offload or FDB isolation + * should set this to the maximum number of bridges spanning the same + * switch tree (or all trees, in the case of cross-tree bridging + * support) that can be offloaded. */ - unsigned int num_fwd_offloading_bridges; + unsigned int max_num_bridges; - size_t num_ports; + unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) @@ -455,6 +530,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } +static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +{ + return dsa_port_is_cpu(dp) && dp->master_admin_up && + dp->master_oper_up; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; @@ -479,6 +560,14 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) +#define dsa_tree_for_each_user_port_continue_reverse(_dp, _dst) \ + list_for_each_entry_continue_reverse((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_user((_dp))) + +#define dsa_tree_for_each_cpu_port(_dp, _dst) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_cpu((_dp))) + #define dsa_switch_for_each_port(_dp, _ds) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) @@ -503,6 +592,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) +#define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \ + dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ + if (dsa_port_is_cpu((_dp))) + static inline u32 dsa_user_ports(struct dsa_switch *ds) { struct dsa_port *dp; @@ -514,6 +607,17 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } +static inline u32 dsa_cpu_ports(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + u32 mask = 0; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) + mask |= BIT(cpu_dp->index); + + return mask; +} + /* Return the local port used to reach an arbitrary switch device */ static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) { @@ -558,6 +662,24 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) return port == dsa_upstream_port(ds, port); } +/* Return true if this is a DSA port leading away from the CPU */ +static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) +{ + return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); +} + +/* Return the local port used to reach the CPU port */ +static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) +{ + struct dsa_port *dp; + + dsa_switch_for_each_available_port(dp, ds) { + return dsa_upstream_port(ds, dp->index); + } + + return ds->num_ports; +} + /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. @@ -585,20 +707,120 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) return dp->vlan_filtering; } +static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->id : 0; +} + +static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) +{ + return dp->lag ? dp->lag->dev : NULL; +} + +static inline bool dsa_port_offloads_lag(struct dsa_port *dp, + const struct dsa_lag *lag) +{ + return dsa_port_lag_dev_get(dp) == lag->dev; +} + +static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) +{ + if (dp->cpu_port_in_lag) + return dsa_port_lag_dev_get(dp->cpu_dp); + + return dp->cpu_dp->master; +} + static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { - if (!dp->bridge_dev) + if (!dp->bridge) return NULL; - if (dp->lag_dev) - return dp->lag_dev; + if (dp->lag) + return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; return dp->slave; } +static inline struct net_device * +dsa_port_bridge_dev_get(const struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->dev : NULL; +} + +static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->num : 0; +} + +static inline bool dsa_port_bridge_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + struct net_device *br_a = dsa_port_bridge_dev_get(a); + struct net_device *br_b = dsa_port_bridge_dev_get(b); + + /* Standalone ports are not in the same bridge with one another */ + return (!br_a || !br_b) ? false : (br_a == br_b); +} + +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + const struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool +dsa_port_offloads_bridge_dev(struct dsa_port *dp, + const struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dsa_port_bridge_dev_get(dp) == bridge_dev; +} + +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + const struct dsa_bridge *bridge) +{ + return dsa_port_bridge_dev_get(dp) == bridge->dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + const struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool +dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) + return true; + + return false; +} + +static inline bool dsa_port_tree_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + return a->ds->dst == b->ds->dst; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { @@ -612,8 +834,19 @@ struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); - int (*change_tag_protocol)(struct dsa_switch *ds, int port, + int (*change_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); + /* + * Method for switch drivers to connect to the tagging protocol driver + * in current use. The switch driver can provide handlers for certain + * types of packets for switch management. + */ + int (*connect_tag_protocol)(struct dsa_switch *ds, + enum dsa_tag_protocol proto); + + int (*port_change_master)(struct dsa_switch *ds, int port, + struct net_device *master, + struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -645,11 +878,14 @@ struct dsa_switch_ops { /* * PHYLINK integration */ - void (*phylink_get_interfaces)(struct dsa_switch *ds, int port, - unsigned long *supported_interfaces); + void (*phylink_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); + struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, + int port, + phy_interface_t iface); int (*phylink_mac_link_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); void (*phylink_mac_config)(struct dsa_switch *ds, int port, @@ -683,8 +919,13 @@ struct dsa_switch_ops { struct ethtool_eth_mac_stats *mac_stats); void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats); + void (*get_rmon_stats)(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); + void (*get_pause_stats)(struct dsa_switch *ds, int port, + struct ethtool_pause_stats *pause_stats); void (*self_test)(struct dsa_switch *ds, int port, struct ethtool_test *etest, u64 *data); @@ -703,6 +944,18 @@ struct dsa_switch_ops { struct ethtool_ts_info *ts); /* + * DCB ops + */ + int (*port_get_default_prio)(struct dsa_switch *ds, int port); + int (*port_set_default_prio)(struct dsa_switch *ds, int port, + u8 prio); + int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); + int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + + /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); @@ -748,26 +1001,25 @@ struct dsa_switch_ops { */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, - struct net_device *bridge); + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, - struct net_device *bridge); - /* Called right after .port_bridge_join() */ - int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); - /* Called right before .port_bridge_leave() */ - void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); + struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + int (*port_mst_state_set)(struct dsa_switch *ds, int port, + const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); + void (*port_set_host_flood)(struct dsa_switch *ds, int port, + bool uc, bool mc); /* * VLAN support @@ -780,23 +1032,36 @@ struct dsa_switch_ops { struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); + int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, + const struct switchdev_vlan_msti *msti); + /* * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); + int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); + int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, + const unsigned char *addr, u16 vid, + struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); /* * RXNFC */ @@ -816,7 +1081,7 @@ struct dsa_switch_ops { struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress); + bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, @@ -830,17 +1095,19 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge, + struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag, - struct netdev_lag_upper_info *info); + int port, struct dsa_lag lag, + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, - int port, struct net_device *lag); + int port, struct dsa_lag lag); /* * PTP functionality @@ -912,10 +1179,11 @@ struct dsa_switch_ops { */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, - struct net_device *lag, - struct netdev_lag_upper_info *info); + struct dsa_lag lag, + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack); int (*port_lag_leave)(struct dsa_switch *ds, int port, - struct net_device *lag); + struct dsa_lag lag); /* * HSR integration @@ -943,6 +1211,13 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + + /* + * DSA master tracking operations + */ + void (*master_state_change)(struct dsa_switch *ds, + const struct net_device *master, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -1019,6 +1294,13 @@ struct dsa_switch_driver { struct net_device *dsa_dev_to_net_device(struct device *dev); +bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db); +bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); + /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { @@ -1094,6 +1376,7 @@ void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); +void dsa_flush_workqueue(void); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); @@ -1118,9 +1401,6 @@ static inline bool dsa_slave_dev_check(const struct net_device *dev) #endif netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); -int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); -int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); -int dsa_port_get_phy_sset_count(struct dsa_port *dp); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); struct dsa_tag_driver { @@ -1153,7 +1433,7 @@ module_exit(dsa_tag_driver_module_exit) /** * module_dsa_tag_drivers() - Helper macro for registering DSA tag * drivers - * @__ops_array: Array of tag driver strucutres + * @__ops_array: Array of tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and diff --git a/include/net/dst.h b/include/net/dst.h index a057319aabef..00b479ce6b99 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #ifndef CONFIG_64BIT atomic_t __refcnt; /* 32-bit offset 64 */ #endif + netdevice_tracker dev_tracker; }; struct dst_metrics { @@ -238,12 +239,6 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) } } -static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time) -{ - dst_hold(dst); - dst_use_noref(dst, time); -} - static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75d..a454cf4327fe 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -4,11 +4,14 @@ #include <linux/skbuff.h> #include <net/ip_tunnels.h> +#include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, + METADATA_MACSEC, + METADATA_XFRM, }; struct hw_port_info { @@ -16,12 +19,23 @@ struct hw_port_info { u32 port_id; }; +struct macsec_info { + sci_t sci; +}; + +struct xfrm_md_info { + u32 if_id; + int link; +}; + struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; + struct macsec_info macsec_info; + struct xfrm_md_info xfrm_info; } u; }; @@ -53,6 +67,27 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) +{ + return (struct xfrm_md_info *)lwt->data; +} + +static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dst_entry *dst; + + if (md_dst && md_dst->type == METADATA_XFRM) + return &md_dst->u.xfrm_info; + + dst = skb_dst(skb); + if (dst && dst->lwtstate && + dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) + return lwt_xfrm_info(dst->lwtstate); + + return NULL; +} + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -82,6 +117,12 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); + case METADATA_MACSEC: + return memcmp(&a->u.macsec_info, &b->u.macsec_info, + sizeof(a->u.macsec_info)); + case METADATA_XFRM: + return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, + sizeof(a->u.xfrm_info)); default: return 1; } @@ -123,8 +164,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } diff --git a/include/net/erspan.h b/include/net/erspan.h index 0d9e86bd9893..6cb4cbd6a48f 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -58,6 +58,9 @@ * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB */ +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/skbuff.h> #include <uapi/linux/erspan.h> #define ERSPAN_VERSION 0x1 /* ERSPAN type II */ diff --git a/include/net/esp.h b/include/net/esp.h index 9c5637d41d95..322950727dd0 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -5,6 +5,7 @@ #include <linux/skbuff.h> struct ip_esp_hdr; +struct xfrm_state; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) { diff --git a/include/net/ethoc.h b/include/net/ethoc.h index 78519ed42ab4..73810f3ca492 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -10,6 +10,9 @@ #ifndef LINUX_NET_ETHOC_H #define LINUX_NET_ETHOC_H 1 +#include <linux/if.h> +#include <linux/types.h> + struct ethoc_platform_data { u8 hwaddr[IFHWADDRLEN]; s8 phy_id; diff --git a/include/net/failover.h b/include/net/failover.h index bb15438f39c7..f2b42b4b9cd6 100644 --- a/include/net/failover.h +++ b/include/net/failover.h @@ -25,6 +25,7 @@ struct failover_ops { struct failover { struct list_head list; struct net_device __rcu *failover_dev; + netdevice_tracker dev_tracker; struct failover_ops __rcu *ops; }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bd07484ab9dd..82da359bca03 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -91,7 +91,6 @@ struct fib_rules_ops { void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; - const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; @@ -103,26 +102,6 @@ struct fib_rule_notifier_info { struct fib_rule *rule; }; -#define FRA_GENERIC_POLICY \ - [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ - [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_PRIORITY] = { .type = NLA_U32 }, \ - [FRA_FWMARK] = { .type = NLA_U32 }, \ - [FRA_TUN_ID] = { .type = NLA_U64 }, \ - [FRA_FWMASK] = { .type = NLA_U32 }, \ - [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 }, \ - [FRA_IP_PROTO] = { .type = NLA_U8 }, \ - [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } - - static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); diff --git a/include/net/firewire.h b/include/net/firewire.h index 299e5df38552..8fbff8d77865 100644 --- a/include/net/firewire.h +++ b/include/net/firewire.h @@ -2,6 +2,8 @@ #ifndef _NET_FIREWIRE_H #define _NET_FIREWIRE_H +#include <linux/types.h> + /* Pseudo L2 address */ #define FWNET_ALEN 16 union fwnet_hwaddr { @@ -11,8 +13,7 @@ union fwnet_hwaddr { __be64 uniq_id; /* EUI-64 */ u8 max_rec; /* max packet size */ u8 sspd; /* max speed */ - __be16 fifo_hi; /* hi 16bits of FIFO addr */ - __be32 fifo_lo; /* lo 32bits of FIFO addr */ + u8 fifo[6]; /* FIFO addr */ } __packed uc; }; diff --git a/include/net/flow.h b/include/net/flow.h index 58beb16a49b8..2f0da4f0318b 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -29,6 +29,7 @@ struct flowi_tunnel { struct flowi_common { int flowic_oif; int flowic_iif; + int flowic_l3mdev; __u32 flowic_mark; __u8 flowic_tos; __u8 flowic_scope; @@ -36,7 +37,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; struct flowi_tunnel flowic_tun_key; @@ -54,11 +54,6 @@ union flowi_uli { __u8 code; } icmpt; - struct { - __le16 dport; - __le16 sport; - } dnports; - __be32 gre_key; struct { @@ -70,6 +65,7 @@ struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif +#define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_tos __fl_common.flowic_tos #define flowi4_scope __fl_common.flowic_scope @@ -102,6 +98,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; @@ -132,6 +129,7 @@ struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif +#define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto @@ -153,30 +151,15 @@ struct flowi6 { __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); -struct flowidn { - struct flowi_common __fl_common; -#define flowidn_oif __fl_common.flowic_oif -#define flowidn_iif __fl_common.flowic_iif -#define flowidn_mark __fl_common.flowic_mark -#define flowidn_scope __fl_common.flowic_scope -#define flowidn_proto __fl_common.flowic_proto -#define flowidn_flags __fl_common.flowic_flags - __le16 daddr; - __le16 saddr; - union flowi_uli uli; -#define fld_sport uli.ports.sport -#define fld_dport uli.ports.dport -} __attribute__((__aligned__(BITS_PER_LONG/8))); - struct flowi { union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; - struct flowidn dn; } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif +#define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_tos u.__fl_common.flowic_tos #define flowi_scope u.__fl_common.flowic_scope @@ -207,11 +190,6 @@ static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) return &(fl6->__fl_common); } -static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) -{ - return container_of(fldn, struct flowi, u.dn); -} - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); #endif diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index aa33e1092e2c..5ccf52ef8809 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { __be16 vlan_tci; }; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_mpls_lse { @@ -177,6 +179,22 @@ struct flow_dissector_key_ports { }; /** + * struct flow_dissector_key_ports_range + * @tp: port number from packet + * @tp_min: min port number in range + * @tp_max: max port number in range + */ +struct flow_dissector_key_ports_range { + union { + struct flow_dissector_key_ports tp; + struct { + struct flow_dissector_key_ports tp_min; + struct flow_dissector_key_ports tp_max; + }; + }; +}; + +/** * flow_dissector_key_icmp: * type: ICMP type * code: ICMP code @@ -251,6 +269,34 @@ struct flow_dissector_key_hash { u32 hash; }; +/** + * struct flow_dissector_key_num_of_vlans: + * @num_of_vlans: num_of_vlans value + */ +struct flow_dissector_key_num_of_vlans { + u8 num_of_vlans; +}; + +/** + * struct flow_dissector_key_pppoe: + * @session_id: pppoe session id + * @ppp_proto: ppp protocol + * @type: pppoe eth type + */ +struct flow_dissector_key_pppoe { + __be16 session_id; + __be16 ppp_proto; + __be16 type; +}; + +/** + * struct flow_dissector_key_l2tpv3: + * @session_id: identifier for a l2tp session + */ +struct flow_dissector_key_l2tpv3 { + __be32 session_id; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -280,6 +326,9 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ + FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ + FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..e343f9f8363e 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -48,6 +48,10 @@ struct flow_match_ports { struct flow_dissector_key_ports *key, *mask; }; +struct flow_match_ports_range { + struct flow_dissector_key_ports_range *key, *mask; +}; + struct flow_match_icmp { struct flow_dissector_key_icmp *key, *mask; }; @@ -72,6 +76,14 @@ struct flow_match_ct { struct flow_dissector_key_ct *key, *mask; }; +struct flow_match_pppoe { + struct flow_dissector_key_pppoe *key, *mask; +}; + +struct flow_match_l2tpv3 { + struct flow_dissector_key_l2tpv3 *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -94,6 +106,8 @@ void flow_rule_match_ip(const struct flow_rule *rule, struct flow_match_ip *out); void flow_rule_match_ports(const struct flow_rule *rule, struct flow_match_ports *out); +void flow_rule_match_ports_range(const struct flow_rule *rule, + struct flow_match_ports_range *out); void flow_rule_match_tcp(const struct flow_rule *rule, struct flow_match_tcp *out); void flow_rule_match_icmp(const struct flow_rule *rule, @@ -116,6 +130,10 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); void flow_rule_match_ct(const struct flow_rule *rule, struct flow_match_ct *out); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out); +void flow_rule_match_l2tpv3(const struct flow_rule *rule, + struct flow_match_l2tpv3 *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, @@ -148,6 +166,11 @@ enum flow_action_id { FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, FLOW_ACTION_PPPOE_PUSH, + FLOW_ACTION_JUMP, + FLOW_ACTION_PIPE, + FLOW_ACTION_VLAN_PUSH_ETH, + FLOW_ACTION_VLAN_POP_ETH, + FLOW_ACTION_CONTINUE, NUM_FLOW_ACTIONS, }; @@ -197,6 +220,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; + u32 hw_index; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -208,6 +232,10 @@ struct flow_action_entry { __be16 proto; u8 prio; } vlan; + struct { /* FLOW_ACTION_VLAN_PUSH_ETH */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; + } vlan_push_eth; struct { /* FLOW_ACTION_MANGLE */ /* FLOW_ACTION_ADD */ enum flow_action_mangle_base htype; @@ -232,12 +260,18 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ - u32 index; u32 burst; u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; u64 burst_pkt; u64 rate_pkt_ps; u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; } police; struct { /* FLOW_ACTION_CT */ int action; @@ -267,7 +301,6 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; struct { - u32 index; s32 prio; u64 basetime; u64 cycletime; @@ -303,6 +336,12 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) return action->num_entries == 1; } +static inline bool flow_action_is_last_entry(const struct flow_action *action, + const struct flow_action_entry *entry) +{ + return entry == &action->entries[action->num_entries - 1]; +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; \ __i < (__actions)->num_entries; \ @@ -552,6 +591,23 @@ struct flow_cls_offload { u32 classid; }; +enum offload_act_command { + FLOW_ACT_REPLACE, + FLOW_ACT_DESTROY, + FLOW_ACT_STATS, +}; + +struct flow_offload_action { + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/ + enum offload_act_command command; + enum flow_action_id id; + u32 index; + struct flow_stats stats; + struct flow_action action; +}; + +struct flow_offload_action *offload_action_alloc(unsigned int num_actions); + static inline struct flow_rule * flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) { @@ -575,5 +631,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); +bool flow_indr_dev_exists(void); #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/fq.h b/include/net/fq.h index 2eccbbd2b559..07b5aff6ec58 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -7,6 +7,10 @@ #ifndef __NET_SCHED_FQ_H #define __NET_SCHED_FQ_H +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/types.h> + struct fq_tin; /** diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index a5f67a2c0c73..524b510f1c68 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -358,8 +358,7 @@ static int fq_init(struct fq *fq, int flows_cnt) if (!fq->flows) return -ENOMEM; - fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), - GFP_KERNEL); + fq->flows_bitmap = bitmap_zalloc(fq->flows_cnt, GFP_KERNEL); if (!fq->flows_bitmap) { kvfree(fq->flows); fq->flows = NULL; @@ -383,7 +382,7 @@ static void fq_reset(struct fq *fq, kvfree(fq->flows); fq->flows = NULL; - kfree(fq->flows_bitmap); + bitmap_free(fq->flows_bitmap); fq->flows_bitmap = NULL; } diff --git a/include/net/garp.h b/include/net/garp.h index 4d9a0c6a2e5f..59a07b171def 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -2,6 +2,8 @@ #ifndef _NET_GARP_H #define _NET_GARP_H +#include <linux/if_ether.h> +#include <linux/types.h> #include <net/stp.h> #define GARP_PROTOCOL_ID 0x1 diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 7cb3fa8310ed..9f97f73615b6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -11,6 +11,7 @@ /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family + * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) */ struct genl_multicast_group { char name[GENL_NAMSIZ]; @@ -36,14 +37,25 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks + * @module: pointer to the owning module (set to THIS_MODULE) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups + * @resv_start_op: first operation for which reserved fields of the header + * can be validated and policies are required (see below); + * new families should leave this field at zero * @mcgrp_offset: starting number of multicast group IDs in this family * (private) * @ops: the operations supported by this family * @n_ops: number of operations supported by this family * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family + * + * Attribute policies (the combination of @policy and @maxattr fields) + * can be attached at the family level or at the operation level. + * If both are present the per-operation policy takes precedence. + * For operations before @resv_start_op lack of policy means that the core + * will perform no attribute parsing or validation. For newer operations + * if policy is not provided core will reject all TLV attributes. */ struct genl_family { int id; /* private */ @@ -57,6 +69,7 @@ struct genl_family { u8 n_ops; u8 n_small_ops; u8 n_mcgrps; + u8 resv_start_op; const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, @@ -106,6 +119,13 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) +/* Report that a root attribute is missing */ +#define GENL_REQ_ATTR_CHECK(info, attr) ({ \ + struct genl_info *__info = (info); \ + \ + NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \ +}) + enum genl_validate_flags { GENL_DONT_VALIDATE_STRICT = BIT(0), GENL_DONT_VALIDATE_DUMP = BIT(1), @@ -116,7 +136,7 @@ enum genl_validate_flags { * struct genl_small_ops - generic netlink operations (small version) * @cmd: command identifier * @internal_flags: flags used by the family - * @flags: flags + * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @dumpit: callback for dumpers @@ -137,7 +157,7 @@ struct genl_small_ops { * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family - * @flags: flags + * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) * @maxattr: maximum number of attributes supported * @policy: netlink policy (takes precedence over family policy) * @validate: validation flags from enum genl_validate_flags @@ -162,9 +182,9 @@ struct genl_ops { }; /** - * struct genl_info - info that is available during dumpit op call + * struct genl_dumpit_info - info that is available during dumpit op call * @family: generic netlink family - for internal genl code usage - * @ops: generic netlink ops - for internal genl code usage + * @op: generic netlink ops - for internal genl code usage * @attrs: netlink attributes */ struct genl_dumpit_info { @@ -343,6 +363,7 @@ int genlmsg_multicast_allns(const struct genl_family *family, /** * genlmsg_unicast - unicast a netlink message + * @net: network namespace to look up @portid in * @skb: netlink message as socket buffer * @portid: netlink portid of the destination socket */ @@ -362,7 +383,7 @@ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) } /** - * gennlmsg_data - head of message payload + * genlmsg_data - head of message payload * @gnlh: genetlink message header */ static inline void *genlmsg_data(const struct genlmsghdr *gnlh) diff --git a/include/net/gro.h b/include/net/gro.h index 01edaf3fdda0..a4fab706240d 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -4,9 +4,378 @@ #define _NET_IPV6_GRO_H #include <linux/indirect_call_wrapper.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/ip6_checksum.h> +#include <linux/skbuff.h> +#include <net/udp.h> -struct list_head; -struct sk_buff; +struct napi_gro_cb { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; + + /* Length of frag0. */ + unsigned int frag0_len; + + /* This indicates where we are processing relative to skb->data. */ + int data_offset; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; + + /* Number of segments aggregated. */ + u16 count; + + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; + + /* jiffies when first packet was created/queued */ + unsigned long age; + +/* Used in napi_gro_cb::free */ +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + /* portion of the cb set to zero at every gro iteration */ + struct_group(zeroed, + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; + + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; + + /* Used in tunnel GRO receive */ + u8 encap_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Free the skb? */ + u8 free:2; + + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; + + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + ); + + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) + +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); +static inline struct sk_buff *call_gro_receive(gro_receive_t cb, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, + struct sk_buff *); +static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + +static inline unsigned int skb_gro_offset(const struct sk_buff *skb) +{ + return NAPI_GRO_CB(skb)->data_offset; +} + +static inline unsigned int skb_gro_len(const struct sk_buff *skb) +{ + return skb->len - NAPI_GRO_CB(skb)->data_offset; +} + +static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) +{ + NAPI_GRO_CB(skb)->data_offset += len; +} + +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) +{ + return NAPI_GRO_CB(skb)->frag0 + offset; +} + +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} + +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + if (!pskb_may_pull(skb, hlen)) + return NULL; + + skb_gro_frag0_invalidate(skb); + return skb->data + offset; +} + +static inline void *skb_gro_header(struct sk_buff *skb, + unsigned int hlen, unsigned int offset) +{ + void *ptr; + + ptr = skb_gro_header_fast(skb, offset); + if (skb_gro_header_hard(skb, hlen)) + ptr = skb_gro_header_slow(skb, hlen, offset); + return ptr; +} + +static inline void *skb_gro_network_header(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + + skb_network_offset(skb); +} + +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (NAPI_GRO_CB(skb)->csum_valid) + NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, + wsum_negate(NAPI_GRO_CB(skb)->csum))); +} + +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); +} + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return ((skb->ip_summed != CHECKSUM_PARTIAL || + skb_checksum_start_offset(skb) < + skb_gro_offset(skb)) && + !skb_at_gro_remcsum_start(skb) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, \ + compute_pseudo(skb, proto)); \ +} while (0) + +struct gro_remcsum { + int offset; + __wsum delta; +}; + +static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) +{ + grc->offset = 0; + grc->delta = 0; +} + +static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + unsigned int off, size_t hdrlen, + int start, int offset, + struct gro_remcsum *grc, + bool nopartial) +{ + __wsum delta; + size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + if (!nopartial) { + NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; + return ptr; + } + + ptr = skb_gro_header(skb, off + plen, off); + if (!ptr) + return NULL; + + delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, + start, offset); + + /* Adjust skb->csum since we changed the packet */ + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + + grc->offset = off + hdrlen + offset; + grc->delta = delta; + + return ptr; +} + +static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, + struct gro_remcsum *grc) +{ + void *ptr; + size_t plen = grc->offset + sizeof(u16); + + if (!grc->delta) + return; + + ptr = skb_gro_header(skb, plen, grc->offset); + if (!ptr) + return; + + remcsum_unadjust((__sum16 *)ptr, grc->delta); +} + +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} +#endif INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); @@ -15,6 +384,14 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); + #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ @@ -22,4 +399,52 @@ INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ }) +struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct udphdr *uh, struct sock *sk); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header(skb, hlen, off); + + return uh; +} + +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); + +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static inline void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +{ + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; + if (napi->rx_count >= READ_ONCE(gro_normal_batch)) + gro_normal_list(napi); +} + + #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/gtp.h b/include/net/gtp.h index 0e16ebb2a82d..2a503f035d18 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -2,13 +2,22 @@ #ifndef _GTP_H_ #define _GTP_H_ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <net/rtnetlink.h> + /* General GTP protocol related definitions. */ #define GTP0_PORT 3386 #define GTP1U_PORT 2152 +/* GTP messages types */ +#define GTP_ECHO_REQ 1 /* Echo Request */ +#define GTP_ECHO_RSP 2 /* Echo Response */ #define GTP_TPDU 255 +#define GTPIE_RECOVERY 14 + struct gtp0_header { /* According to GSM TS 09.60. */ __u8 flags; __u8 type; @@ -27,6 +36,43 @@ struct gtp1_header { /* According to 3GPP TS 29.060. */ __be32 tid; } __attribute__ ((packed)); +struct gtp1_header_long { /* According to 3GPP TS 29.060. */ + __u8 flags; + __u8 type; + __be16 length; + __be32 tid; + __be16 seq; + __u8 npdu; + __u8 next; +} __packed; + +/* GTP Information Element */ +struct gtp_ie { + __u8 tag; + __u8 val; +} __packed; + +struct gtp0_packet { + struct gtp0_header gtp0_h; + struct gtp_ie ie; +} __packed; + +struct gtp1u_packet { + struct gtp1_header_long gtp1u_h; + struct gtp_ie ie; +} __packed; + +struct gtp_pdu_session_info { /* According to 3GPP TS 38.415. */ + u8 pdu_type; + u8 qfi; +}; + +static inline bool netif_is_gtp(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gtp"); +} + #define GTP1_F_NPDU 0x01 #define GTP1_F_SEQ 0x02 #define GTP1_F_EXTHDR 0x04 diff --git a/include/net/gue.h b/include/net/gue.h index e42402f180b7..dfca298bec9c 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -30,6 +30,9 @@ * may refer to options placed after this field. */ +#include <asm/byteorder.h> +#include <linux/types.h> + struct guehdr { union { struct { diff --git a/include/net/hwbm.h b/include/net/hwbm.h index c81444611a22..aa495decec35 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -2,6 +2,8 @@ #ifndef _HWBM_H #define _HWBM_H +#include <linux/mutex.h> + struct hwbm_pool { /* Capacity of the pool */ int size; diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 11630351c978..598f53d2a3a0 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019 Intel Corporation + * Copyright (c) 2018-2019, 2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -365,7 +365,7 @@ enum ieee80211_radiotap_zero_len_psdu_type { */ static inline u16 ieee80211_get_radiotap_len(const char *data) { - struct ieee80211_radiotap_header *hdr = (void *)data; + const struct ieee80211_radiotap_header *hdr = (const void *)data; return get_unaligned_le16(&hdr->it_len); } diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index d0d188c3294b..03b64bf876a4 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -15,6 +15,22 @@ #ifndef IEEE802154_NETDEVICE_H #define IEEE802154_NETDEVICE_H +#define IEEE802154_REQUIRED_SIZE(struct_type, member) \ + (offsetof(typeof(struct_type), member) + \ + sizeof(((typeof(struct_type) *)(NULL))->member)) + +#define IEEE802154_ADDR_OFFSET \ + offsetof(typeof(struct sockaddr_ieee802154), addr) + +#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \ + IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type)) + +#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \ + IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr)) + +#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \ + IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr)) + #include <net/af_ieee802154.h> #include <linux/netdevice.h> #include <linux/skbuff.h> @@ -165,6 +181,33 @@ static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr) memcpy(raw, &temp, IEEE802154_ADDR_LEN); } +static inline int +ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len) +{ + struct ieee802154_addr_sa *sa; + int ret = 0; + + sa = &daddr->addr; + if (len < IEEE802154_MIN_NAMELEN) + return -EINVAL; + switch (sa->addr_type) { + case IEEE802154_ADDR_NONE: + break; + case IEEE802154_ADDR_SHORT: + if (len < IEEE802154_NAMELEN_SHORT) + ret = -EINVAL; + break; + case IEEE802154_ADDR_LONG: + if (len < IEEE802154_NAMELEN_LONG) + ret = -EINVAL; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a, const struct ieee802154_addr_sa *sa) { diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 653e7d0f65cb..c8490729b4ae 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; @@ -71,6 +79,8 @@ struct inet6_ifaddr { bool tokenized; + u8 ifa_proto; + struct rcu_head rcu; struct in6_addr peer_addr; }; @@ -160,6 +170,7 @@ struct ipv6_devstat { struct inet6_dev { struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head addr_list; diff --git a/include/net/ila.h b/include/net/ila.h index f98dcd5791b0..73ebe5eab272 100644 --- a/include/net/ila.h +++ b/include/net/ila.h @@ -8,6 +8,8 @@ #ifndef _NET_ILA_H #define _NET_ILA_H +struct sk_buff; + int ila_xlat_outgoing(struct sk_buff *skb); int ila_xlat_incoming(struct sk_buff *skb); diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 7392f959a405..025bd8d3c769 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -11,6 +11,8 @@ #include <linux/types.h> +struct flowi; +struct flowi6; struct request_sock; struct sk_buff; struct sock; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 81b965953036..56f1286583d3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -103,15 +103,24 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const int dif); int inet6_hash(struct sock *sk); -#endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) +static inline bool inet6_match(struct net *net, const struct sock *sk, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + const __portpair ports, + const int dif, const int sdif) +{ + if (!net_eq(sock_net(sk), net) || + sk->sk_family != AF_INET6 || + sk->sk_portpair != ports || + !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return false; + + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index cad2a611efde..cec453c18f1d 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -3,6 +3,10 @@ #define _INET_COMMON_H #include <linux/indirect_call_wrapper.h> +#include <linux/net.h> +#include <linux/netdev_features.h> +#include <linux/types.h> +#include <net/sock.h> extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -12,6 +16,8 @@ extern const struct proto_ops inet_dgram_ops; */ struct msghdr; +struct net; +struct page; struct sock; struct sockaddr; struct socket; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fa6a87246a7b..c2b15f7e5516 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -25,6 +25,7 @@ #undef INET_CSK_CLEAR_TIMERS struct inet_bind_bucket; +struct inet_bind2_bucket; struct tcp_congestion_ops; /* @@ -57,6 +58,7 @@ struct inet_connection_sock_af_ops { * * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node + * @icsk_bind2_hash: Bind node in the bhash2 table * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout @@ -66,7 +68,6 @@ struct inet_connection_sock_af_ops { * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data * @icsk_clean_acked Clean acked data hook - * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -84,6 +85,7 @@ struct inet_connection_sock { struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; + struct inet_bind2_bucket *icsk_bind2_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; @@ -96,7 +98,6 @@ struct inet_connection_sock { const struct tcp_ulp_ops *icsk_ulp_ops; void __rcu *icsk_ulp_data; void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); - struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:5, icsk_ca_initialized:1, @@ -285,6 +286,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); +static inline unsigned long +reqsk_timeout(struct request_sock *req, unsigned long max_timeout) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, max_timeout); +} + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ @@ -304,7 +313,7 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) (EPOLLIN | EPOLLRDNORM) : 0; } -int inet_csk_listen_start(struct sock *sk, int backlog); +int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); @@ -315,7 +324,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -332,14 +341,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h new file mode 100644 index 000000000000..72f250dffada --- /dev/null +++ b/include/net/inet_dscp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP) + * + * DSCP is defined in RFC 2474: + * + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | DSCP | CU | + * +---+---+---+---+---+---+---+---+ + * + * DSCP: differentiated services codepoint + * CU: currently unused + * + * The whole DSCP + CU bits form the DS field. + * The DS field is also commonly called TOS or Traffic Class (for IPv6). + * + * Note: the CU bits are now used for Explicit Congestion Notification + * (RFC 3168). + */ + +#ifndef _INET_DSCP_H +#define _INET_DSCP_H + +#include <linux/types.h> + +/* Special type for storing DSCP values. + * + * A dscp_t variable stores a DS field with the CU (ECN) bits cleared. + * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding + * bugs where ECN bits are erroneously taken into account during FIB lookups + * or policy routing. + * + * Note: to get the real DSCP value contained in a dscp_t variable one would + * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have + * a helper for that, but there's currently no users. + */ +typedef u8 __bitwise dscp_t; + +#define INET_DSCP_MASK 0xfc + +static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) +{ + return (__force dscp_t)(dsfield & INET_DSCP_MASK); +} + +static inline __u8 inet_dscp_to_dsfield(dscp_t dscp) +{ + return (__force __u8)dscp; +} + +static inline bool inet_validate_dscp(__u8 val) +{ + return !(val & ~INET_DSCP_MASK); +} + +#endif /* _INET_DSCP_H */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 48cc5795ceda..0b0876610553 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,6 +4,9 @@ #include <linux/rhashtable-types.h> #include <linux/completion.h> +#include <linux/in6.h> +#include <linux/rbtree_types.h> +#include <linux/refcount.h> /* Per netns frag queues directory */ struct fqdir { @@ -70,6 +73,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far + * @mono_delivery_time: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -90,6 +94,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; + u8 mono_delivery_time; __u8 flags; u16 max_size; struct fqdir *fqdir; @@ -117,8 +122,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f72ec113ae56..3af1e927247d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -23,6 +23,7 @@ #include <net/inet_connection_sock.h> #include <net/inet_sock.h> +#include <net/ip.h> #include <net/sock.h> #include <net/route.h> #include <net/tcp_states.h> @@ -90,7 +91,31 @@ struct inet_bind_bucket { struct hlist_head owners; }; -static inline struct net *ib_net(struct inet_bind_bucket *ib) +struct inet_bind2_bucket { + possible_net_t ib_net; + int l3mdev; + unsigned short port; +#if IS_ENABLED(CONFIG_IPV6) + unsigned short family; +#endif + union { +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr v6_rcv_saddr; +#endif + __be32 rcv_saddr; + }; + /* Node in the bhash2 inet_bind_hashbucket chain */ + struct hlist_node node; + /* List of sockets hashed to this bucket */ + struct hlist_head owners; +}; + +static inline struct net *ib_net(const struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + +static inline struct net *ib2_net(const struct inet_bind2_bucket *ib) { return read_pnet(&ib->ib_net); } @@ -111,11 +136,7 @@ struct inet_bind_hashbucket { #define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - unsigned int count; - union { - struct hlist_head head; - struct hlist_nulls_head nulls_head; - }; + struct hlist_nulls_head nulls_head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -137,37 +158,32 @@ struct inet_hashinfo { * TCP hash as well as the others for fast bind/connect. */ struct kmem_cache *bind_bucket_cachep; + /* This bind table is hashed by local port */ struct inet_bind_hashbucket *bhash; + struct kmem_cache *bind2_bucket_cachep; + /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4) + * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used + * primarily for expediting bind conflict resolution. + */ + struct inet_bind_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * might be often dirty. - */ - /* All sockets in TCP_LISTEN state will be in listening_hash. - * This is the only table where wildcard'd TCP sockets can - * exist. listening_hash is only hashed by local port number. - * If lhash2 is initialized, the same socket will also be hashed - * to lhash2 by port and address. - */ - struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] - ____cacheline_aligned_in_smp; + bool pernet; }; -#define inet_lhash2_for_each_icsk_continue(__icsk) \ - hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk(__icsk, list) \ - hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ - hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) +static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IP_DCCP) + return sk->sk_prot->h.hashinfo ? : + sock_net(sk)->ipv4.tcp_death_row.hashinfo; +#else + return sock_net(sk)->ipv4.tcp_death_row.hashinfo; +#endif +} static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) @@ -203,16 +219,9 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } -static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, - int dif, int sdif) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, - bound_dev_if, dif, sdif); -#else - return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -#endif -} +struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, + unsigned int ehash_entries); +void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo); struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, @@ -221,32 +230,67 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); +bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, + const struct net *net, unsigned short port, + int l3mdev); + +struct inet_bind2_bucket * +inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, + struct inet_bind_hashbucket *head, + unsigned short port, int l3mdev, + const struct sock *sk); + +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, + struct inet_bind2_bucket *tb); + +struct inet_bind2_bucket * +inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, + const struct net *net, + unsigned short port, int l3mdev, + const struct sock *sk); + +bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, + const struct net *net, unsigned short port, + int l3mdev, const struct sock *sk); + static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, const u32 bhash_size) { return (lport + net_hash_mix(net)) & (bhash_size - 1); } -void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum); - -/* These can have wildcards, don't try too hard. */ -static inline u32 inet_lhashfn(const struct net *net, const unsigned short num) +static inline struct inet_bind_hashbucket * +inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, + const struct net *net, unsigned short port) { - return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1); -} + u32 hash; -static inline int inet_sk_listen_hashfn(const struct sock *sk) -{ - return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port); + else +#endif + hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port); + return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +struct inet_bind_hashbucket * +inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); + +/* This should be called whenever a socket's sk_rcv_saddr (ipv4) or + * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's + * rcv_saddr field should already have been updated when this is called. + */ +int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); + +void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2, unsigned short port); + /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); -void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, @@ -295,7 +339,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif -#if (BITS_PER_LONG == 64) #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ @@ -307,24 +350,20 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_addrpair == (__cookie)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#else /* 32-bit arch */ -#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_daddr == (__saddr)) && \ - ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#endif /* 64-bit arch */ +static inline bool inet_match(struct net *net, const struct sock *sk, + const __addrpair cookie, const __portpair ports, + int dif, int sdif) +{ + if (!net_eq(sock_net(sk), net) || + sk->sk_portpair != ports || + sk->sk_addrpair != cookie) + return false; + + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); +} /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM @@ -425,7 +464,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9e1111f5915b..bf5654ce711e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -116,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif - return sk->sk_bound_dev_if; + return bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) @@ -131,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif @@ -147,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, return bound_dev_if == dif || bound_dev_if == sdif; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_cork { unsigned int flags; __be32 addr; @@ -252,6 +265,11 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) +static inline bool sk_is_inet(struct sock *sk) +{ + return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; +} + /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket @@ -368,8 +386,20 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } +static inline bool inet_addr_valid_or_nonlocal(struct net *net, + struct inet_sock *inet, + __be32 addr, + int addr_type) +{ + return inet_can_nonlocal_bind(net, inet) || + addr == htonl(INADDR_ANY) || + addr_type == RTN_LOCAL || + addr_type == RTN_MULTICAST || + addr_type == RTN_BROADCAST; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index dfd919b3119e..5b47545f22d3 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -65,10 +65,9 @@ struct inet_timewait_sock { /* these three are in inet_sock */ __be16 tw_sport; /* And these are ours. */ - unsigned int tw_kill : 1, - tw_transparent : 1, + unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 2, /* 2 bits hole */ + tw_pad : 3, /* 3 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 3f45ba37a2c6..781d2d8b2f29 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -35,7 +35,7 @@ struct ioam6_schema { int len; __be32 hdr; - u8 data[0]; + u8 data[]; }; struct ioam6_pernet_data { diff --git a/include/net/ip.h b/include/net/ip.h index b71e88507c4a..038097c2a152 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -56,6 +56,7 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) +#define IPSKB_NOPOLICY BIT(8) u16 frag_max_size; }; @@ -93,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; - ipcm->oif = inet->sk.sk_bound_dev_if; + ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; } @@ -356,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -383,7 +384,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -445,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; @@ -517,7 +518,6 @@ void ip_dst_metrics_put(struct dst_entry *dst) kfree(p); } -u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, @@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } @@ -568,14 +567,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - - return csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), proto, 0); -} - /* * Map a multicast IP onto multicast MAC for type ethernet. */ @@ -721,7 +712,7 @@ int ip_forward(struct sk_buff *skb); */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag); + __be32 daddr, struct rtable *rt); int __ip_options_echo(struct net *net, struct ip_options *dopt, struct sk_buff *skb, const struct ip_options *sopt); @@ -752,8 +743,12 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); +int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +int do_ip_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int ip_ra_control(struct sock *sk, unsigned char on, @@ -791,5 +786,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index b3f4eaa88672..c8a96b888277 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - - return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), proto, 0)); -} - static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, @@ -65,15 +57,9 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb, { struct tcphdr *th = tcp_hdr(skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 83b8070d1cc9..6268963d9599 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -20,6 +20,7 @@ #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> +#include <uapi/linux/bpf.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -189,14 +190,16 @@ struct fib6_info { u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; + + u8 offload; + u8 trap; + u8 offload_failed; + u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, fib6_destroying:1, - offload:1, - trap:1, - offload_failed:1, - unused:1; + unused:4; struct rcu_head rcu; struct nexthop *nh; @@ -281,7 +284,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; @@ -366,9 +369,8 @@ struct rt6_statistics { __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ - /* The following stats are not protected by any lock */ + /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ - atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5efd0b71dc67..035d61d50a98 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -2,6 +2,16 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H +#include <net/addrconf.h> +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/nexthop.h> + struct route_info { __u8 type; __u8 length; @@ -19,16 +29,6 @@ struct route_info { __u8 prefix[]; /* 0,8 or 16 */ }; -#include <net/addrconf.h> -#include <net/flow.h> -#include <net/ip6_fib.h> -#include <net/sock.h> -#include <net/lwtunnel.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/route.h> -#include <net/nexthop.h> - #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 #define RT6_LOOKUP_F_HAS_SADDR 0x00000004 @@ -263,19 +263,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { - unsigned int mtu; - - struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + const struct dst_entry *dst = skb_dst(skb); + unsigned int mtu; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { - mtu = READ_ONCE(skb_dst(skb)->dev->mtu); - mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); - } else - mtu = dst_mtu(skb_dst(skb)); - + mtu = READ_ONCE(dst->dev->mtu); + mtu -= lwtunnel_headroom(dst->lwtstate, mtu); + } else { + mtu = dst_mtu(dst); + } return mtu; } diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 028eaea1c854..74b369bddf49 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -46,6 +46,7 @@ struct __ip6_tnl_parm { struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ + netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ @@ -57,7 +58,7 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ int encap_hlen; /* Encap header length (FOU,GUE) */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3417ba2d27ad..a378eff827c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -17,6 +17,7 @@ #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> +#include <net/inet_dscp.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> @@ -24,7 +25,7 @@ struct fib_config { u8 fc_dst_len; - u8 fc_tos; + dscp_t fc_dscp; u8 fc_protocol; u8 fc_scope; u8 fc_type; @@ -79,6 +80,7 @@ struct fnhe_hash_bucket { struct fib_nh_common { struct net_device *nhc_dev; + netdevice_tracker nhc_dev_tracker; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; @@ -111,6 +113,7 @@ struct fib_nh { int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev +#define fib_nh_dev_tracker nh_common.nhc_dev_tracker #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate @@ -209,7 +212,7 @@ struct fib_rt_info { u32 tb_id; __be32 dst; int dst_len; - u8 tos; + dscp_t dscp; u8 type; u8 offload:1, trap:1, @@ -222,7 +225,7 @@ struct fib_entry_notifier_info { u32 dst; int dst_len; struct fib_info *fi; - u8 tos; + dscp_t dscp; u8 type; u32 tb_id; }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bc3b13ec93c9..fca357679816 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -54,6 +54,7 @@ struct ip_tunnel_key { __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; + __u8 flow_flags; }; /* Flags for ip_tunnel_info mode. */ @@ -104,7 +105,10 @@ struct metadata_dst; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error @@ -113,7 +117,7 @@ struct ip_tunnel { /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ - u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ @@ -240,11 +244,19 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id) static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif, - __u32 mark, __u32 tun_inner_hash) + __be32 key, __u8 tos, + struct net *net, int oif, + __u32 mark, __u32 tun_inner_hash, + __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_oif = oif; + + if (oif) { + fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif); + /* Legacy VRF/l3mdev use case */ + fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; + } + fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_tos = tos; @@ -252,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; + fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); @@ -289,6 +302,12 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); +bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *encap); + +void ip_tunnel_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms); + extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); @@ -377,9 +396,11 @@ static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP)) + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IP)) return iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (payload_protocol == htons(ETH_P_IPV6)) return ipv6_get_dsfield((const struct ipv6hdr *)iph); else return 0; @@ -388,9 +409,11 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP)) + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IP)) return iph->ttl; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (payload_protocol == htons(ETH_P_IPV6)) return ((const struct ipv6hdr *)iph)->hop_limit; else return 0; @@ -446,8 +469,8 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); } else { diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index fee6fc451597..8660a2a6d1fc 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -2,11 +2,13 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include <linux/skbuff.h> #include <linux/types.h> #define IPCOMP_SCRATCH_SIZE 65400 struct crypto_comp; +struct ip_comp_hdr; struct ipcomp_data { u16 threshold; @@ -20,7 +22,7 @@ struct xfrm_state; int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb); int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb); void ipcomp_destroy(struct xfrm_state *x); -int ipcomp_init_state(struct xfrm_state *x); +int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb) { diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h index e3534299bd2a..8276897d0c2e 100644 --- a/include/net/ipconfig.h +++ b/include/net/ipconfig.h @@ -7,6 +7,8 @@ /* The following are initdata: */ +#include <linux/types.h> + extern int ic_proto_enabled; /* Protocols enabled (see IC_xxx) */ extern int ic_set_manually; /* IPconfig parameters set manually */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c19bf51ded1d..37943ba3a73c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -15,12 +15,14 @@ #include <linux/refcount.h> #include <linux/jump_label_ratelimit.h> #include <net/if_inet6.h> -#include <net/ndisc.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/inet_dscp.h> #include <net/snmp.h> #include <net/netns/hash.h> +struct ip_tunnel_info; + #define SIN6_LEN_RFC2133 24 #define IPV6_MAXPLEN 65535 @@ -149,6 +151,17 @@ struct frag_hdr { __be32 identification; }; +/* + * Jumbo payload option, as described in RFC 2675 2. + */ +struct hop_jumbo_hdr { + u8 nexthdr; + u8 hdrlen; + u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ + u8 tlv_len; /* 4 */ + __be32 jumbo_payload_len; +}; + #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -345,9 +358,9 @@ struct ipcm6_cookie { struct sockcm_cookie sockc; __s16 hlimit; __s16 tclass; + __u16 gso_size; __s8 dontfrag; struct ipv6_txoptions *opt; - __u16 gso_size; }; static inline void ipcm6_init(struct ipcm6_cookie *ipc6) @@ -391,17 +404,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) kfree_rcu(opt, rcu); } +#if IS_ENABLED(CONFIG_IPV6) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); extern struct static_key_false_deferred ipv6_flowlabel_exclusive; static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { - if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) + if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && + READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); return NULL; } +#endif struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, @@ -435,14 +451,55 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt); +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt); + +static inline struct ipv6_txoptions * +ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) +{ + if (!opt) + return NULL; + return __ipv6_fixup_options(opt_space, opt); +} bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head. + * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + */ +static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + const struct hop_jumbo_hdr *jhdr; + const struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) + return 0; + + if (skb->protocol != htons(ETH_P_IPV6)) + return 0; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return 0; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return 0; + + jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || + jhdr->nexthdr != IPPROTO_TCP) + return 0; + return jhdr->nexthdr; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special @@ -965,6 +1022,11 @@ static inline u8 ip6_tclass(__be32 flowinfo) return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } +static inline dscp_t ip6_dscp(__be32 flowinfo) +{ + return inet_dsfield_to_dscp(ip6_tclass(flowinfo)); +} + static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) { return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; @@ -1001,7 +1063,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, + void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags); @@ -1017,8 +1079,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + void *from, size_t length, int transhdrlen, + struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); @@ -1094,8 +1156,12 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, */ DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); +int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +int do_ipv6_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); @@ -1116,6 +1182,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); +void inet6_cleanup_sock(struct sock *sk); +void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, @@ -1145,7 +1213,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list); int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage __user *p); + sockptr_t optval, size_t ss_offset); #ifdef CONFIG_PROC_FS int ac6_proc_init(struct net *net); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..5052c66e22d2 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_FRAG_H #define _IPV6_FRAG_H +#include <linux/icmpv6.h> #include <linux/kernel.h> #include <net/addrconf.h> #include <net/ipv6.h> @@ -67,7 +68,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 45e0339be6fa..c48186bf4737 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -81,6 +81,10 @@ struct ipv6_bpf_stub { const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); + int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); + int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index ff06246dbbb9..df85d19fbf84 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -112,10 +112,12 @@ enum iucv_tx_notify { struct iucv_sock { struct sock sk; - char src_user_id[8]; - char src_name[8]; - char dst_user_id[8]; - char dst_name[8]; + struct_group(init, + char src_user_id[8]; + char src_name[8]; + char dst_user_id[8]; + char dst_name[8]; + ); struct list_head accept_q; spinlock_t accept_q_lock; struct sock *parent; diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index e766300b3e99..3e1f76786d7b 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -16,6 +16,13 @@ * Connection state transition actions * (Fb = F bit; Pb = P bit; Xb = X bit) */ + +#include <linux/types.h> + +struct sk_buff; +struct sock; +struct timer_list; + #define LLC_CONN_AC_CLR_REMOTE_BUSY 1 #define LLC_CONN_AC_CONN_IND 2 #define LLC_CONN_AC_CONN_CONFIRM 3 diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 48f3f891b2f9..53823d61d8b6 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +#include <net/llc_c_ac.h> +#include <net/llc_c_ev.h> + /* Connection component state management */ /* connection states */ #define LLC_CONN_OUT_OF_SVC 0 /* prior to allocation */ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea985aa7a6c5..2c1ea3414640 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -38,6 +38,7 @@ struct llc_sock { struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ + netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; diff --git a/include/net/llc_s_ac.h b/include/net/llc_s_ac.h index a61b98c108ee..f71790305bc9 100644 --- a/include/net/llc_s_ac.h +++ b/include/net/llc_s_ac.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +struct llc_sap; +struct sk_buff; + /* SAP component actions */ #define SAP_ACT_UNITDATA_IND 1 #define SAP_ACT_SEND_UI 2 diff --git a/include/net/llc_s_ev.h b/include/net/llc_s_ev.h index 84db3a59ed28..fb7df1d70af3 100644 --- a/include/net/llc_s_ev.h +++ b/include/net/llc_s_ev.h @@ -13,6 +13,7 @@ */ #include <linux/skbuff.h> +#include <net/llc.h> /* Defines SAP component events */ /* Types of events (possible values in 'ev->type') */ diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index c4359e203013..ed5b2fa40d32 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -12,6 +12,12 @@ * See the GNU General Public License for more details. */ +#include <linux/types.h> +#include <net/llc_s_ac.h> +#include <net/llc_s_ev.h> + +struct llc_sap_state_trans; + #define LLC_NR_SAP_STATES 2 /* size of state table */ /* structures and types */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd757f0987b0..ac2bad57933f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #ifndef MAC80211_H @@ -18,6 +18,7 @@ #include <linux/if_ether.h> #include <linux/skbuff.h> #include <linux/ieee80211.h> +#include <linux/lockdep.h> #include <net/cfg80211.h> #include <net/codel.h> #include <net/ieee80211_radiotap.h> @@ -125,6 +126,22 @@ * via the usual ieee80211_tx_dequeue). */ +/** + * DOC: HW timestamping + * + * Timing Measurement and Fine Timing Measurement require accurate timestamps + * of the action frames TX/RX and their respective acks. + * + * To report hardware timestamps for Timing Measurement or Fine Timing + * Measurement frame RX, the low level driver should set the SKB's hwtstamp + * field to the frame RX timestamp and report the ack TX timestamp in the + * ieee80211_rx_status struct. + * + * Similarly, To report hardware timestamps for Timing Measurement or Fine + * Timing Measurement frame TX, the driver should set the SKB's hwtstamp field + * to the frame TX timestamp and report the ack RX timestamp in the + * ieee80211_tx_status struct. + */ struct device; /** @@ -261,11 +278,13 @@ enum ieee80211_chanctx_switch_mode { * done. * * @vif: the vif that should be switched from old_ctx to new_ctx + * @link_conf: the link conf that's switching * @old_ctx: the old context to which the vif was assigned * @new_ctx: the new context to which the vif must be assigned */ struct ieee80211_vif_chanctx_switch { struct ieee80211_vif *vif; + struct ieee80211_bss_conf *link_conf; struct ieee80211_chanctx_conf *old_ctx; struct ieee80211_chanctx_conf *new_ctx; }; @@ -273,8 +292,8 @@ struct ieee80211_vif_chanctx_switch { /** * enum ieee80211_bss_change - BSS change notification flags * - * These flags are used with the bss_info_changed() callback - * to indicate which BSS parameter changed. + * These flags are used with the bss_info_changed(), link_info_changed() + * and vif_cfg_changed() callbacks to indicate which parameter(s) changed. * * @BSS_CHANGED_ASSOC: association status changed (associated/disassociated), * also implies a change in the AID. @@ -513,8 +532,9 @@ struct ieee80211_fils_discovery { * This structure keeps information about a BSS (and an association * to that BSS) that can change during the lifetime of the BSS. * + * @addr: (link) address used locally + * @link_id: link ID, or 0 for non-MLO * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE - * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK * @uora_exists: is the UORA element advertised by AP * @ack_enabled: indicates support to receive a multi-TID that solicits either * ACK, BACK or both @@ -527,11 +547,6 @@ struct ieee80211_fils_discovery { * mode only, set if the AP advertises TWT responder role) * @twt_protected: does this BSS support protected TWT frames * @twt_broadcast: does this BSS support broadcast TWT - * @assoc: association status - * @ibss_joined: indicates whether this station is part of an IBSS - * or not - * @ibss_creator: indicates if a new IBSS network is being created - * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection * @use_short_preamble: use 802.11b short preamble * @use_short_slot: use short slot time (only relevant for ERP) @@ -552,6 +567,8 @@ struct ieee80211_fils_discovery { * IMPORTANT: These three sync_* parameters would possibly be out of sync * by the time the driver will use them. The synchronized view is currently * guaranteed only in certain callbacks. + * Note also that this is not used with MLD associations, mac80211 doesn't + * know how to track beacons for all of the links for this. * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp * @basic_rates: bitmap of basic rates, each bit stands for an @@ -577,21 +594,7 @@ struct ieee80211_fils_discovery { * threshold event and can't be enabled simultaneously with it. * @cqm_rssi_high: Connection quality monitor RSSI upper threshold. * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis - * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The - * may filter ARP queries targeted for other addresses than listed here. - * The driver must allow ARP queries targeted for all address listed here - * to pass through. An empty list implies no ARP queries need to pass. - * @arp_addr_cnt: Number of addresses currently on the list. Note that this - * may be larger than %IEEE80211_BSS_ARP_ADDR_LIST_LEN (the arp_addr_list - * array size), it's up to the driver what to do in that case. * @qos: This is a QoS-enabled BSS. - * @idle: This interface is idle. There's also a global idle flag in the - * hardware config which may be more appropriate depending on what - * your driver/device needs to do. - * @ps: power-save mode (STA only). This flag is NOT affected by - * offchannel/dynamic_ps operations. - * @ssid: The SSID of the current vif. Valid in AP and IBSS mode. - * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. * @txpower: TX power in dBm. INT_MIN means not configured. * @txpower_type: TX power adjustment used to control per packet Transmit @@ -629,16 +632,31 @@ struct ieee80211_fils_discovery { * @fils_discovery: FILS discovery configuration * @unsol_bcast_probe_resp_interval: Unsolicited broadcast probe response * interval. - * @s1g: BSS is S1G BSS (affects Association Request format). * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz * @tx_pwr_env: transmit power envelope array of BSS. * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. + * @eht_support: does this BSS support EHT + * @csa_active: marks whether a channel switch is going on. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. + * @mu_mimo_owner: indicates interface owns MU-MIMO capability + * @chanctx_conf: The channel context this interface is assigned to, or %NULL + * when it is not assigned. This pointer is RCU-protected due to the TX + * path needing to access it; even though the netdev carrier will always + * be off when it is %NULL there can still be races and packets could be + * processed after it switches back to %NULL. + * @color_change_active: marks whether a color change is ongoing. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. + * @color_change_color: the bss color that will be used after the change. */ struct ieee80211_bss_conf { const u8 *bssid; + unsigned int link_id; + u8 addr[ETH_ALEN] __aligned(2); u8 htc_trig_based_pkt_ext; bool uora_exists; u8 uora_ocw_range; @@ -648,10 +666,6 @@ struct ieee80211_bss_conf { bool twt_responder; bool twt_protected; bool twt_broadcast; - /* association related data */ - bool assoc, ibss_joined; - bool ibss_creator; - u16 aid; /* erp related data */ bool use_cts_prot; bool use_short_preamble; @@ -673,13 +687,7 @@ struct ieee80211_bss_conf { s32 cqm_rssi_high; struct cfg80211_chan_def chandef; struct ieee80211_mu_group_data mu_group; - __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; - int arp_addr_cnt; bool qos; - bool idle; - bool ps; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - size_t ssid_len; bool hidden_ssid; int txpower; enum nl80211_tx_power_setting txpower_type; @@ -704,12 +712,19 @@ struct ieee80211_bss_conf { struct cfg80211_he_bss_color he_bss_color; struct ieee80211_fils_discovery fils_discovery; u32 unsol_bcast_probe_resp_interval; - bool s1g; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; u8 tx_pwr_env_num; u8 pwr_reduction; + bool eht_support; + + bool csa_active; + bool mu_mimo_owner; + struct ieee80211_chanctx_conf __rcu *chanctx_conf; + + bool color_change_active; + u8 color_change_color; }; /** @@ -868,6 +883,14 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_DONT_REORDER: This frame should not be reordered * relative to other frames that have this flag set, independent * of their QoS TID or other priority field values. + * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally + * for sequence number assignment + * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this + * frame should be transmitted on the specific link. This really is + * only relevant for frames that do not have data present, and is + * also not used for 802.3 format frames. Note that even if the frame + * is on a specific link, address translation might still apply if + * it's intended for an MLD. * * These flags are used in tx_info->control.flags. */ @@ -881,6 +904,24 @@ enum mac80211_tx_control_flags { IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6), IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), + IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), + IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, +}; + +#define IEEE80211_LINK_UNSPECIFIED 0xf +#define IEEE80211_TX_CTRL_MLO_LINK_UNSPEC \ + u32_encode_bits(IEEE80211_LINK_UNSPECIFIED, \ + IEEE80211_TX_CTRL_MLO_LINK) + +/** + * enum mac80211_tx_status_flags - flags to describe transmit status + * + * @IEEE80211_TX_STATUS_ACK_SIGNAL_VALID: ACK signal is valid + * + * These flags are used in tx_info->status.flags. + */ +enum mac80211_tx_status_flags { + IEEE80211_TX_STATUS_ACK_SIGNAL_VALID = BIT(0), }; /* @@ -1019,7 +1060,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * (3) TX status information - driver tells mac80211 what happened * * @flags: transmit info flags, defined above - * @band: the band to transmit on (use for checking for races) + * @band: the band to transmit on (use e.g. for checking for races), + * not valid if the interface is an MLD since we won't know which + * link the frame will be transmitted on * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC * @ack_frame_id: internal frame ID for TX status, used internally * @tx_time_est: TX time estimate in units of 4us, used internally @@ -1046,7 +1089,7 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @status.antenna: (legacy, kept only for iwlegacy) * @status.tx_time: airtime consumed for transmission; note this is only * used for WMM AC, not for airtime fairness - * @status.is_valid_ack_signal: ACK signal is valid + * @status.flags: status flags, see &enum mac80211_tx_status_flags * @status.status_driver_data: driver use area * @ack: union part for pure ACK data * @ack.cookie: cookie for the ACK @@ -1099,8 +1142,8 @@ struct ieee80211_tx_info { u8 ampdu_len; u8 antenna; u16 tx_time; - bool is_valid_ack_signal; - void *status_driver_data[19 / sizeof(void *)]; + u8 flags; + void *status_driver_data[18 / sizeof(void *)]; } status; struct { struct ieee80211_tx_rate driver_rates[ @@ -1131,20 +1174,45 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info) return info->tx_time_est << 2; } +/*** + * struct ieee80211_rate_status - mrr stage for status path + * + * This struct is used in struct ieee80211_tx_status to provide drivers a + * dynamic way to report about used rates and power levels per packet. + * + * @rate_idx The actual used rate. + * @try_count How often the rate was tried. + * @tx_power_idx An idx into the ieee80211_hw->tx_power_levels list of the + * corresponding wifi hardware. The idx shall point to the power level + * that was used when sending the packet. + */ +struct ieee80211_rate_status { + struct rate_info rate_idx; + u8 try_count; + u8 tx_power_idx; +}; + /** * struct ieee80211_tx_status - extended tx status info for rate control * * @sta: Station that the packet was transmitted for * @info: Basic tx status information * @skb: Packet skb (can be NULL if not provided by the driver) - * @rate: The TX rate that was used when sending the packet + * @rates: Mrr stages that were used when sending the packet + * @n_rates: Number of mrr stages (count of instances for @rates) * @free_list: list where processed skbs are stored to be free'd by the driver + * @ack_hwtstamp: Hardware timestamp of the received ack in nanoseconds + * Only needed for Timing measurement and Fine timing measurement action + * frames. Only reported by devices that have timestamping enabled. */ struct ieee80211_tx_status { struct ieee80211_sta *sta; struct ieee80211_tx_info *info; struct sk_buff *skb; - struct rate_info *rate; + struct ieee80211_rate_status *rates; + ktime_t ack_hwtstamp; + u8 n_rates; + struct list_head *free_list; }; @@ -1188,9 +1256,9 @@ static inline struct ieee80211_rx_status *IEEE80211_SKB_RXCB(struct sk_buff *skb * in the TX status but the rate control information (it does clear * the count since you need to fill that in anyway). * - * NOTE: You can only use this function if you do NOT use - * info->driver_data! Use info->rate_driver_data - * instead if you need only the less space that allows. + * NOTE: While the rates array is kept intact, this will wipe all of the + * driver_data fields in info, so it's up to the driver to restore + * any fields it needs after calling this helper. */ static inline void ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) @@ -1205,12 +1273,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) /* clear the rate counts */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) info->status.rates[i].count = 0; - - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&info->status.ampdu_ack_len, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + memset_after(&info->status, 0, rates); } @@ -1385,6 +1448,9 @@ enum mac80211_rx_encoding { * (TSF) timer when the first data symbol (MPDU) arrived at the hardware. * @boottime_ns: CLOCK_BOOTTIME timestamp the frame was received at, this is * needed only for beacons and probe responses that update the scan cache. + * @ack_tx_hwtstamp: Hardware timestamp for the ack TX in nanoseconds. Only + * needed for Timing measurement and Fine timing measurement action frames. + * Only reported by devices that have timestamping enabled. * @device_timestamp: arbitrary timestamp for the device, mac80211 doesn't use * it but can store it and pass it back to the driver for synchronisation * @band: the active band when this frame was received @@ -1415,10 +1481,17 @@ enum mac80211_rx_encoding { * each A-MPDU but the same for each subframe within one A-MPDU * @ampdu_delimiter_crc: A-MPDU delimiter CRC * @zero_length_psdu_type: radiotap type of the 0-length PSDU + * @link_valid: if the link which is identified by @link_id is valid. This flag + * is set only when connection is MLO. + * @link_id: id of the link used to receive the packet. This is used along with + * @link_valid. */ struct ieee80211_rx_status { u64 mactime; - u64 boottime_ns; + union { + u64 boottime_ns; + ktime_t ack_tx_hwtstamp; + }; u32 device_timestamp; u32 ampdu_reference; u32 flag; @@ -1436,6 +1509,7 @@ struct ieee80211_rx_status { s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 ampdu_delimiter_crc; u8 zero_length_psdu_type; + u8 link_valid:1, link_id:4; }; static inline u32 @@ -1674,37 +1748,75 @@ enum ieee80211_offload_flags { }; /** + * struct ieee80211_vif_cfg - interface configuration + * @assoc: association status + * @ibss_joined: indicates whether this station is part of an IBSS or not + * @ibss_creator: indicates if a new IBSS network is being created + * @ps: power-save mode (STA only). This flag is NOT affected by + * offchannel/dynamic_ps operations. + * @aid: association ID number, valid only when @assoc is true + * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The + * may filter ARP queries targeted for other addresses than listed here. + * The driver must allow ARP queries targeted for all address listed here + * to pass through. An empty list implies no ARP queries need to pass. + * @arp_addr_cnt: Number of addresses currently on the list. Note that this + * may be larger than %IEEE80211_BSS_ARP_ADDR_LIST_LEN (the arp_addr_list + * array size), it's up to the driver what to do in that case. + * @ssid: The SSID of the current vif. Valid in AP and IBSS mode. + * @ssid_len: Length of SSID given in @ssid. + * @s1g: BSS is S1G BSS (affects Association Request format). + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. + * @ap_addr: AP MLD address, or BSSID for non-MLO connections + * (station mode only) + */ +struct ieee80211_vif_cfg { + /* association related data */ + bool assoc, ibss_joined; + bool ibss_creator; + bool ps; + u16 aid; + + __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; + int arp_addr_cnt; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + size_t ssid_len; + bool s1g; + bool idle; + u8 ap_addr[ETH_ALEN] __aligned(2); +}; + +/** * struct ieee80211_vif - per-interface data * * Data in this structure is continually present for driver * use during the life of a virtual interface. * * @type: type of this virtual interface + * @cfg: vif configuration, see &struct ieee80211_vif_cfg * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to + * @link_conf: in case of MLD, the per-link BSS configuration, + * indexed by link ID + * @valid_links: bitmap of valid links, or 0 for non-MLO. + * @active_links: The bitmap of active links, or 0 for non-MLO. + * The driver shouldn't change this directly, but use the + * API calls meant for that purpose. * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively - * @csa_active: marks whether a channel switch is going on. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. - * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @driver_flags: flags/capabilities the driver has for this interface, * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed * at runtime, mac80211 will never touch this field - * @offloaad_flags: hardware offload capabilities/flags for this interface. + * @offload_flags: hardware offload capabilities/flags for this interface. * These are initialized by mac80211 before calling .add_interface, * .change_interface or .update_vif_offload and updated by the driver * within these ops, based on supported features or runtime change * restrictions. * @hw_queue: hardware queue for each AC * @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only - * @chanctx_conf: The channel context this interface is assigned to, or %NULL - * when it is not assigned. This pointer is RCU-protected due to the TX - * path needing to access it; even though the netdev carrier will always - * be off when it is %NULL there can still be races and packets could be - * processed after it switches back to %NULL. * @debugfs_dir: debugfs dentry, can be used by drivers to create own per * interface debug files. Note that it will be NULL for the virtual * monitor interface (if that is requested.) @@ -1719,27 +1831,22 @@ enum ieee80211_offload_flags { * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. - * @color_change_active: marks whether a color change is ongoing. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. - * @color_change_color: the bss color that will be used after the change. * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { enum nl80211_iftype type; + struct ieee80211_vif_cfg cfg; struct ieee80211_bss_conf bss_conf; + struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; + u16 valid_links, active_links; u8 addr[ETH_ALEN] __aligned(2); bool p2p; - bool csa_active; - bool mu_mimo_owner; u8 cab_queue; u8 hw_queue[IEEE80211_NUM_ACS]; struct ieee80211_txq *txq; - struct ieee80211_chanctx_conf __rcu *chanctx_conf; - u32 driver_flags; u32 offload_flags; @@ -1752,15 +1859,18 @@ struct ieee80211_vif { bool txqs_stopped[IEEE80211_NUM_ACS]; - bool color_change_active; - u8 color_change_color; - struct ieee80211_vif *mbssid_tx_vif; /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; +#define for_each_vif_active_link(vif, link, link_id) \ + for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ + if ((!(vif)->active_links || \ + (vif)->active_links & BIT(link_id)) && \ + (link = rcu_dereference((vif)->link_conf[link_id]))) + static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { #ifdef CONFIG_MAC80211_MESH @@ -1793,6 +1903,19 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); /** + * lockdep_vif_mutex_held - for lockdep checks on link poiners + * @vif: the interface to check + */ +static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) +{ + return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx); +} + +#define link_conf_dereference_protected(vif, link_id) \ + rcu_dereference_protected((vif)->link_conf[link_id], \ + lockdep_vif_mutex_held(vif)) + +/** * enum ieee80211_key_flags - key flags * * These flags are used for communication about keys between the driver @@ -1873,6 +1996,7 @@ enum ieee80211_key_flags { * - Temporal Authenticator Rx MIC Key (64 bits) * @icv_len: The ICV length for this key type * @iv_len: The IV length for this key type + * @link_id: the link ID for MLO, or -1 for non-MLO or pairwise keys */ struct ieee80211_key_conf { atomic64_t tx_pn; @@ -1882,6 +2006,7 @@ struct ieee80211_key_conf { u8 hw_key_idx; s8 keyidx; u16 flags; + s8 link_id; u8 keylen; u8 key[]; }; @@ -1931,36 +2056,6 @@ struct ieee80211_key_seq { }; /** - * struct ieee80211_cipher_scheme - cipher scheme - * - * This structure contains a cipher scheme information defining - * the secure packet crypto handling. - * - * @cipher: a cipher suite selector - * @iftype: a cipher iftype bit mask indicating an allowed cipher usage - * @hdr_len: a length of a security header used the cipher - * @pn_len: a length of a packet number in the security header - * @pn_off: an offset of pn from the beginning of the security header - * @key_idx_off: an offset of key index byte in the security header - * @key_idx_mask: a bit mask of key_idx bits - * @key_idx_shift: a bit shift needed to get key_idx - * key_idx value calculation: - * (sec_header_base[key_idx_off] & key_idx_mask) >> key_idx_shift - * @mic_len: a mic length in bytes - */ -struct ieee80211_cipher_scheme { - u32 cipher; - u16 iftype; - u8 hdr_len; - u8 pn_len; - u8 pn_off; - u8 key_idx_off; - u8 key_idx_mask; - u8 key_idx_shift; - u8 mic_len; -}; - -/** * enum set_key_cmd - key command * * Used with the set_key() callback in &struct ieee80211_ops, this @@ -1999,6 +2094,7 @@ enum ieee80211_sta_state { * @IEEE80211_STA_RX_BW_80: station can receive up to 80 MHz * @IEEE80211_STA_RX_BW_160: station can receive up to 160 MHz * (including 80+80 MHz) + * @IEEE80211_STA_RX_BW_320: station can receive up to 320 MHz * * Implementation note: 20 must be zero to be initialized * correctly, the values must be sorted. @@ -2008,6 +2104,7 @@ enum ieee80211_sta_rx_bandwidth { IEEE80211_STA_RX_BW_40, IEEE80211_STA_RX_BW_80, IEEE80211_STA_RX_BW_160, + IEEE80211_STA_RX_BW_320, }; /** @@ -2047,6 +2144,77 @@ struct ieee80211_sta_txpwr { }; /** + * struct ieee80211_sta_aggregates - info that is aggregated from active links + * + * Used for any per-link data that needs to be aggregated and updated in the + * main &struct ieee80211_sta when updated or the active links change. + * + * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes. + * This field is always valid for packets with a VHT preamble. + * For packets with a HT preamble, additional limits apply: + * + * * If the skb is transmitted as part of a BA agreement, the + * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. + * * If the skb is not part of a BA agreement, the A-MSDU maximal + * size is min(max_amsdu_len, 7935) bytes. + * + * Both additional HT limits must be enforced by the low level + * driver. This is defined by the spec (IEEE 802.11-2012 section + * 8.3.2.2 NOTE 2). + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. + * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID + */ +struct ieee80211_sta_aggregates { + u16 max_amsdu_len; + + u16 max_rc_amsdu_len; + u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; +}; + +/** + * struct ieee80211_link_sta - station Link specific info + * All link specific info for a STA link for a non MLD STA(single) + * or a MLD STA(multiple entries) are stored here. + * + * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr + * in ieee80211_sta. For MLO Link STA this addr can be same or different + * from addr in ieee80211_sta (representing MLD STA addr) + * @link_id: the link ID for this link STA (0 for deflink) + * @smps_mode: current SMPS mode (off, static or dynamic) + * @supp_rates: Bitmap of supported rates + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @he_cap: HE capabilities of this STA + * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities + * @eht_cap: EHT capabilities of this STA + * @bandwidth: current bandwidth the station can receive with + * @rx_nss: in HT/VHT, the maximum number of spatial streams the + * station can receive at the moment, changed by operating mode + * notifications and capabilities. The value is only valid after + * the station moves to associated state. + * @txpwr: the station tx power configuration + * + */ +struct ieee80211_link_sta { + u8 addr[ETH_ALEN]; + u8 link_id; + enum ieee80211_smps_mode smps_mode; + + u32 supp_rates[NUM_NL80211_BANDS]; + struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; + struct ieee80211_sta_he_cap he_cap; + struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; + + struct ieee80211_sta_aggregates agg; + + u8 rx_nss; + enum ieee80211_sta_rx_bandwidth bandwidth; + struct ieee80211_sta_txpwr txpwr; +}; + +/** * struct ieee80211_sta - station table entry * * A station table entry represents a station we are possibly @@ -2055,14 +2223,11 @@ struct ieee80211_sta_txpwr { * either be protected by rcu_read_lock() explicitly or implicitly, * or you must take good care to not use such a pointer after a * call to your sta_remove callback that removed it. + * This also represents the MLD STA in case of MLO association + * and holds pointers to various link STA's * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP - * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own capabilities - * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities - * @he_cap: HE capabilities of this STA - * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU * that this station is allowed to transmit to us. * Can be modified by driver. @@ -2074,75 +2239,81 @@ struct ieee80211_sta_txpwr { * if wme is supported. The bits order is like in * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. - * @bandwidth: current bandwidth the station can receive with - * @rx_nss: in HT/VHT, the maximum number of spatial streams the - * station can receive at the moment, changed by operating mode - * notifications and capabilities. The value is only valid after - * the station moves to associated state. - * @smps_mode: current SMPS mode (off, static or dynamic) * @rates: rate control selection table * @tdls: indicates whether the STA is a TDLS peer * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only * valid if the STA is a TDLS peer in the first place. * @mfp: indicates whether the STA uses management frame protection or not. + * @mlo: indicates whether the STA is MLO station. * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. + * @cur: currently valid data as aggregated from the active links + * For non MLO STA it will point to the deflink data. For MLO STA + * ieee80211_sta_recalc_aggregates() must be called to update it. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. - * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. - * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID - * @txpwr: the station tx power configuration * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames + * @deflink: This holds the default link STA information, for non MLO STA all link + * specific STA information is accessed through @deflink or through + * link[0] which points to address of @deflink. For MLO Link STA + * the first added link STA will point to deflink. + * @link: reference to Link Sta entries. For Non MLO STA, except 1st link, + * i.e link[0] all links would be assigned to NULL by default and + * would access link information via @deflink or link[0]. For MLO + * STA, first link STA being added will point its link pointer to + * @deflink address and remaining would be allocated and the address + * would be assigned to link[link_id] where link_id is the id assigned + * by the AP. + * @valid_links: bitmap of valid links, or 0 for non-MLO */ struct ieee80211_sta { - u32 supp_rates[NUM_NL80211_BANDS]; u8 addr[ETH_ALEN]; u16 aid; - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_sta_vht_cap vht_cap; - struct ieee80211_sta_he_cap he_cap; - struct ieee80211_he_6ghz_capa he_6ghz_capa; u16 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; - u8 rx_nss; - enum ieee80211_sta_rx_bandwidth bandwidth; - enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_rates __rcu *rates; bool tdls; bool tdls_initiator; bool mfp; + bool mlo; u8 max_amsdu_subframes; - /** - * @max_amsdu_len: - * indicates the maximal length of an A-MSDU in bytes. - * This field is always valid for packets with a VHT preamble. - * For packets with a HT preamble, additional limits apply: - * - * * If the skb is transmitted as part of a BA agreement, the - * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. - * * If the skb is not part of a BA agreement, the A-MSDU maximal - * size is min(max_amsdu_len, 7935) bytes. - * - * Both additional HT limits must be enforced by the low level - * driver. This is defined by the spec (IEEE 802.11-2012 section - * 8.3.2.2 NOTE 2). - */ - u16 max_amsdu_len; + struct ieee80211_sta_aggregates *cur; + bool support_p2p_ps; - u16 max_rc_amsdu_len; - u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; - struct ieee80211_sta_txpwr txpwr; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; + u16 valid_links; + struct ieee80211_link_sta deflink; + struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; +#ifdef CONFIG_LOCKDEP +bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta); +#else +static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) +{ + return true; +} +#endif + +#define link_sta_dereference_protected(sta, link_id) \ + rcu_dereference_protected((sta)->link[link_id], \ + lockdep_sta_mutex_held(sta)) + +#define for_each_sta_active_link(vif, sta, link_sta, link_id) \ + for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ + if ((!(vif)->active_links || \ + (vif)->active_links & BIT(link_id)) && \ + ((link_sta) = link_sta_dereference_protected(sta, link_id))) + /** * enum sta_notify_cmd - sta notify command * @@ -2422,6 +2593,12 @@ struct ieee80211_txq { * usage and 802.11 frames with %RX_FLAG_ONLY_MONITOR set for monitor to * the stack. * + * @IEEE80211_HW_DETECTS_COLOR_COLLISION: HW/driver has support for BSS color + * collision detection and doesn't need it in software. + * + * @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting + * multicast frames on all links, mac80211 should not do that. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2477,6 +2654,8 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD, IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP, + IEEE80211_HW_DETECTS_COLOR_COLLISION, + IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2592,9 +2771,6 @@ enum ieee80211_hw_flags { * deliver to a WMM STA during any Service Period triggered by the WMM STA. * Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values. * - * @n_cipher_schemes: a size of an array of cipher schemes definitions. - * @cipher_schemes: a pointer to an array of cipher scheme definitions - * supported by HW. * @max_nan_de_entries: maximum number of NAN DE functions supported by the * device. * @@ -2606,6 +2782,12 @@ enum ieee80211_hw_flags { * refilling deficit of each TXQ. * * @max_mtu: the max mtu could be set. + * + * @tx_power_levels: a list of power levels supported by the wifi hardware. + * The power levels can be specified either as integer or fractions. + * The power level at idx 0 shall be the maximum positive power level. + * + * @max_txpwr_levels_idx: the maximum valid idx of 'tx_power_levels' list. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2638,12 +2820,12 @@ struct ieee80211_hw { netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; - u8 n_cipher_schemes; - const struct ieee80211_cipher_scheme *cipher_schemes; u8 max_nan_de_entries; u8 tx_sk_pacing_shift; u8 weight_multiplier; u32 max_mtu; + const s8 *tx_power_levels; + u8 max_txpwr_levels_idx; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3469,6 +3651,22 @@ struct ieee80211_prep_tx_info { * for association indication. The @changed parameter indicates which * of the bss parameters has changed when a call is made. The callback * can sleep. + * Note: this callback is called if @vif_cfg_changed or @link_info_changed + * are not implemented. + * + * @vif_cfg_changed: Handler for configuration requests related to interface + * (MLD) parameters from &struct ieee80211_vif_cfg that vary during the + * lifetime of the interface (e.g. assoc status, IP addresses, etc.) + * The @changed parameter indicates which value changed. + * The callback can sleep. + * + * @link_info_changed: Handler for configuration requests related to link + * parameters from &struct ieee80211_bss_conf that are related to an + * individual link. e.g. legacy/HT/VHT/... rate information. + * The @changed parameter indicates which value changed, and the @link_id + * parameter indicates the link ID. Note that the @link_id will be 0 for + * non-MLO connections. + * The callback can sleep. * * @prepare_multicast: Prepare for multicast filter configuration. * This callback is optional, and its return value is passed @@ -3944,6 +4142,28 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. + * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to + * resolve a path for hardware flow offloading + * @change_vif_links: Change the valid links on an interface, note that while + * removing the old link information is still valid (link_conf pointer), + * but may immediately disappear after the function returns. The old or + * new links bitmaps may be 0 if going from/to a non-MLO situation. + * The @old array contains pointers to the old bss_conf structures + * that were already removed, in case they're needed. + * This callback can sleep. + * @change_sta_links: Change the valid links of a station, similar to + * @change_vif_links. This callback can sleep. + * Note that a sta can also be inserted or removed with valid links, + * i.e. passed to @sta_add/@sta_state with sta->valid_links not zero. + * In fact, cannot change from having valid_links and not having them. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3967,10 +4187,19 @@ struct ieee80211_ops { void (*bss_info_changed)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, - u32 changed); + u64 changed); + void (*vif_cfg_changed)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u64 changed); + void (*link_info_changed)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *info, + u64 changed); - int (*start_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); - void (*stop_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*start_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); + void (*stop_ap)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); u64 (*prepare_multicast)(struct ieee80211_hw *hw, struct netdev_hw_addr_list *mc_list); @@ -4053,7 +4282,8 @@ struct ieee80211_ops { struct ieee80211_sta *sta, struct station_info *sinfo); int (*conf_tx)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, u16 ac, + struct ieee80211_vif *vif, + unsigned int link_id, u16 ac, const struct ieee80211_tx_queue_params *params); u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4172,9 +4402,11 @@ struct ieee80211_ops { u32 changed); int (*assign_vif_chanctx)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx_conf *ctx); void (*unassign_vif_chanctx)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx_conf *ctx); int (*switch_vif_chanctx)(struct ieee80211_hw *hw, struct ieee80211_vif_chanctx_switch *vifs, @@ -4272,6 +4504,21 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); + int (*net_fill_forward_path)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path); + int (*change_vif_links)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]); + int (*change_sta_links)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links); }; /** @@ -4919,12 +5166,14 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); * @cntdwn_counter_offs: array of IEEE80211_MAX_CNTDWN_COUNTERS_NUM offsets * to countdown counters. This array can contain zero values which * should be ignored. + * @mbssid_off: position of the multiple bssid element */ struct ieee80211_mutable_offsets { u16 tim_offset; u16 tim_length; u16 cntdwn_counter_offs[IEEE80211_MAX_CNTDWN_COUNTERS_NUM]; + u16 mbssid_off; }; /** @@ -4933,6 +5182,7 @@ struct ieee80211_mutable_offsets { * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @offs: &struct ieee80211_mutable_offsets pointer to struct that will * receive the offsets that may be updated by the driver. + * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) * * If the driver implements beaconing modes, it must use this function to * obtain the beacon template. @@ -4949,7 +5199,8 @@ struct ieee80211_mutable_offsets { struct sk_buff * ieee80211_beacon_get_template(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_mutable_offsets *offs); + struct ieee80211_mutable_offsets *offs, + unsigned int link_id); /** * ieee80211_beacon_get_tim - beacon generation function @@ -4960,6 +5211,7 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw, * @tim_length: pointer to variable that will receive the TIM IE length, * (including the ID and length bytes!). * Set to 0 if invalid (in non-AP modes). + * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) * * If the driver implements beaconing modes, it must use this function to * obtain the beacon frame. @@ -4975,21 +5227,24 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw, */ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - u16 *tim_offset, u16 *tim_length); + u16 *tim_offset, u16 *tim_length, + unsigned int link_id); /** * ieee80211_beacon_get - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) * * See ieee80211_beacon_get_tim(). * * Return: See ieee80211_beacon_get_tim(). */ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { - return ieee80211_beacon_get_tim(hw, vif, NULL, NULL); + return ieee80211_beacon_get_tim(hw, vif, NULL, NULL, link_id); } /** @@ -5083,6 +5338,9 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: If the vif is an MLD, get a frame with the link addresses + * for the given link ID. For a link_id < 0 you get a frame with + * MLD addresses, however useful that might be. * @qos_ok: QoS NDP is acceptable to the caller, this should be set * if at all possible * @@ -5100,7 +5358,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - bool qos_ok); + int link_id, bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template @@ -5602,6 +5860,9 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. * This version can only be used while holding the wiphy mutex. + * The driver must not call this with a lock held that it can also take in + * response to callbacks from mac80211, and it must not call this within + * callbacks made by mac80211 - both would result in deadlocks. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -5616,6 +5877,24 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, void *data); /** + * ieee80211_iterate_stations - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. + * This function allows the iterator function to sleep, when the iterator + * function is atomic @ieee80211_iterate_stations_atomic can be used. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call, cannot sleep + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + +/** * ieee80211_iterate_stations_atomic - iterate stations * * This function iterates over all stations associated with a given @@ -5751,6 +6030,22 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *localaddr); /** + * ieee80211_find_sta_by_link_addrs - find STA by link addresses + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @addr: remote station's link address + * @localaddr: local link address, use %NULL for any (but avoid that) + * @link_id: pointer to obtain the link ID if the STA is found, + * may be %NULL if the link ID is not needed + * + * Obtain the STA by link address, must use RCU protection. + */ +struct ieee80211_sta * +ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr, + unsigned int *link_id); + +/** * ieee80211_sta_block_awake - block station from waking up * @hw: the hardware * @pubsta: the station @@ -5826,6 +6121,19 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); /** + * ieee80211_sta_recalc_aggregates - recalculate aggregate data after a change + * @pubsta: the station + * + * Call this function after changing a per-link aggregate data as referenced in + * &struct ieee80211_sta_aggregates by accessing the agg field of + * &struct ieee80211_link_sta. + * + * With non MLO the data in deflink will be referenced directly. In that case + * there is no need to call this function. + */ +void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta); + +/** * ieee80211_sta_register_airtime - register airtime usage for a sta/tid * * Register airtime usage for a given sta on a given tid. The driver must call @@ -6021,6 +6329,16 @@ void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect); void ieee80211_resume_disconnect(struct ieee80211_vif *vif); /** + * ieee80211_hw_restart_disconnect - disconnect from AP after + * hardware restart + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Instructs mac80211 to disconnect from the AP after + * hardware restart. + */ +void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif); + +/** * ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring * rssi threshold triggered * @@ -6064,15 +6382,28 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** + * ieee80211_channel_switch_disconnect - disconnect due to channel switch error + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @block_tx: if %true, do not send deauth frame. + * + * Instruct mac80211 to disconnect due to a channel switch error. The channel + * switch can request to block the tx and so, we need to make sure we do not send + * a deauth frame in this case. + */ +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, + bool block_tx); + +/** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: link ID for MLO, or 0 * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than * the stack about possible interference, for example by bluetooth. */ -void ieee80211_request_smps(struct ieee80211_vif *vif, +void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id, enum ieee80211_smps_mode smps_mode); /** @@ -6293,7 +6624,7 @@ static inline int rate_supported(struct ieee80211_sta *sta, enum nl80211_band band, int index) { - return (sta == NULL || sta->supp_rates[band] & BIT(index)); + return (sta == NULL || sta->deflink.supp_rates[band] & BIT(index)); } static inline s8 @@ -6404,6 +6735,7 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif) * ieee80211_update_mu_groups - set the VHT MU-MIMO groud data * * @vif: the specified virtual interface + * @link_id: the link ID for MLO, otherwise 0 * @membership: 64 bits array - a bit is set if station is member of the group * @position: 2 bits per group id indicating the position in the group * @@ -6412,7 +6744,7 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif) * matching GroupId management frame. * Calls to this function need to be serialized with RX path. */ -void ieee80211_update_mu_groups(struct ieee80211_vif *vif, +void ieee80211_update_mu_groups(struct ieee80211_vif *vif, unsigned int link_id, const u8 *membership, const u8 *position); void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, @@ -6645,6 +6977,9 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) { } +void __ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, bool force); + /** * ieee80211_schedule_txq - schedule a TXQ for transmission * @@ -6657,7 +6992,11 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) * The driver may call this function if it has buffered packets for * this TXQ internally. */ -void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); +static inline void +ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + __ieee80211_schedule_txq(hw, txq, true); +} /** * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq() @@ -6669,8 +7008,12 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq); * The driver may set force=true if it has buffered packets for this TXQ * internally. */ -void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, - bool force); +static inline void +ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, + bool force) +{ + __ieee80211_schedule_txq(hw, txq, force); +} /** * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit @@ -6818,10 +7161,11 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. + * @gfp: allocation flags */ void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap); + u64 color_bitmap, gfp_t gfp); /** * ieee80211_is_tx_data - check if frame is a data frame @@ -6840,4 +7184,45 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) ieee80211_is_data(hdr->frame_control); } +/** + * ieee80211_set_active_links - set active links in client mode + * @vif: interface to set active links on + * @active_links: the new active links bitmap + * + * This changes the active links on an interface. The interface + * must be in client mode (in AP mode, all links are always active), + * and @active_links must be a subset of the vif's valid_links. + * + * If a link is switched off and another is switched on at the same + * time (e.g. active_links going from 0x1 to 0x10) then you will get + * a sequence of calls like + * - change_vif_links(0x11) + * - unassign_vif_chanctx(link_id=0) + * - change_sta_links(0x11) for each affected STA (the AP) + * (TDLS connections on now inactive links should be torn down) + * - remove group keys on the old link (link_id 0) + * - add new group keys (GTK/IGTK/BIGTK) on the new link (link_id 4) + * - change_sta_links(0x10) for each affected STA (the AP) + * - assign_vif_chanctx(link_id=4) + * - change_vif_links(0x10) + * + * Note: This function acquires some mac80211 locks and must not + * be called with any driver locks held that could cause a + * lock dependency inversion. Best call it without locks. + */ +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); + +/** + * ieee80211_set_active_links_async - asynchronously set active links + * @vif: interface to set active links on + * @active_links: the new active links bitmap + * + * See ieee80211_set_active_links() for more information, the only + * difference here is that the link change is triggered async and + * can be called in any context, but the link switch will only be + * completed after it returns. + */ +void ieee80211_set_active_links_async(struct ieee80211_vif *vif, + u16 active_links); + #endif /* MAC80211_H */ diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d524ffb9eb25..bdac0ddbdcdb 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -464,6 +464,12 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, * ieee802154_wake_queue - wake ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we had to stop the queue to + * avoid new skb to come during the transmission. The queue then needs to be + * woken up after the operation. + * * Drivers should use this function instead of netif_wake_queue. */ void ieee802154_wake_queue(struct ieee802154_hw *hw); @@ -472,6 +478,12 @@ void ieee802154_wake_queue(struct ieee802154_hw *hw); * ieee802154_stop_queue - stop ieee802154 queue * @hw: pointer as obtained from ieee802154_alloc_hw(). * + * Tranceivers usually have either one transmit framebuffer or one framebuffer + * for both transmitting and receiving. Hence, the core currently only handles + * one frame at a time for each phy, which means we need to tell upper layers to + * stop giving us new skbs while we are busy with the transmitted one. The queue + * must then be stopped before transmitting. + * * Drivers should use this function instead of netif_stop_queue. */ void ieee802154_stop_queue(struct ieee802154_hw *hw); @@ -486,4 +498,23 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw); void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling); +/** + * ieee802154_xmit_error - offloaded frame transmission failed + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + * @reason: error code + */ +void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, + int reason); + +/** + * ieee802154_xmit_hw_error - frame could not be offloaded to the transmitter + * because of a hardware error (bus error, timeout, etc) + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + */ +void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb); + #endif /* NET_MAC802154_H */ diff --git a/include/net/macsec.h b/include/net/macsec.h index d6fa6b97f6ef..5b9c61c4d3a6 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -14,12 +14,27 @@ #define MACSEC_DEFAULT_PN_LEN 4 #define MACSEC_XPN_PN_LEN 8 -#define MACSEC_SALT_LEN 12 #define MACSEC_NUM_AN 4 /* 2 bits for the association number */ +#define MACSEC_SCI_LEN 8 +#define MACSEC_PORT_ES (htons(0x0001)) + +#define MACSEC_TCI_VERSION 0x80 +#define MACSEC_TCI_ES 0x40 /* end station */ +#define MACSEC_TCI_SC 0x20 /* SCI present */ +#define MACSEC_TCI_SCB 0x10 /* epon */ +#define MACSEC_TCI_E 0x08 /* encryption */ +#define MACSEC_TCI_C 0x04 /* changed text */ +#define MACSEC_AN_MASK 0x03 /* association number */ +#define MACSEC_TCI_CONFID (MACSEC_TCI_E | MACSEC_TCI_C) + +#define MACSEC_DEFAULT_ICV_LEN 16 + typedef u64 __bitwise sci_t; typedef u32 __bitwise ssci_t; +struct metadata_dst; + typedef union salt { struct { u32 ssci; @@ -183,6 +198,7 @@ struct macsec_tx_sa { * @scb: single copy broadcast flag * @sa: array of secure associations * @stats: stats for this TXSC + * @md_dst: MACsec offload metadata dst */ struct macsec_tx_sc { bool active; @@ -193,6 +209,7 @@ struct macsec_tx_sc { bool scb; struct macsec_tx_sa __rcu *sa[MACSEC_NUM_AN]; struct pcpu_tx_sc_stats __percpu *stats; + struct metadata_dst *md_dst; }; /** @@ -254,8 +271,6 @@ struct macsec_context { struct macsec_rx_sa_stats *rx_sa_stats; struct macsec_dev_stats *dev_stats; } stats; - - u8 prepare:1; }; /** @@ -289,5 +304,12 @@ struct macsec_ops { }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); +static inline bool macsec_send_sci(const struct macsec_secy *secy) +{ + const struct macsec_tx_sc *tx_sc = &secy->tx_sc; + + return tx_sc->send_sci || + (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); +} #endif /* _NET_MACSEC_H_ */ diff --git a/include/net/mctp.h b/include/net/mctp.h index 7e35ec79b909..82800d521c3d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -36,15 +36,28 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) -#define MCTP_HEADER_MAXLEN 4 - #define MCTP_INITIAL_DEFAULT_NET 1 -static inline bool mctp_address_ok(mctp_eid_t eid) +static inline bool mctp_address_unicast(mctp_eid_t eid) { return eid >= 8 && eid < 255; } +static inline bool mctp_address_broadcast(mctp_eid_t eid) +{ + return eid == 255; +} + +static inline bool mctp_address_null(mctp_eid_t eid) +{ + return eid == 0; +} + +static inline bool mctp_address_matches(mctp_eid_t match, mctp_eid_t eid) +{ + return match == eid || match == MCTP_ADDR_ANY; +} + static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) { return (struct mctp_hdr *)skb_network_header(skb); @@ -121,7 +134,7 @@ struct mctp_sock { */ struct mctp_sk_key { mctp_eid_t peer_addr; - mctp_eid_t local_addr; + mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ /* we hold a ref to sk when set */ @@ -158,6 +171,12 @@ struct mctp_sk_key { */ unsigned long dev_flow_state; struct mctp_dev *dev; + + /* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire + * automatically on timeout or response, instead SIOCMCTPDROPTAG + * is used. + */ + bool manual_alloc; }; struct mctp_skb_cb { @@ -234,6 +253,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); +struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t daddr, mctp_eid_t saddr, + bool manual, u8 *tagp); /* routing <--> device interface */ unsigned int mctp_default_net(struct net *net); diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 9deb3a3735da..0c71c27979fb 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -6,6 +6,9 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 +#include <linux/types.h> +#include <net/lwtunnel.h> + struct mpls_iptunnel_encap { u8 labels; u8 ttl_propagate; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index a925349b4b89..412479ebf5ad 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -35,9 +35,11 @@ struct mptcp_ext { frozen:1, reset_transient:1; u8 reset_reason:4, - csum_reqd:1; + csum_reqd:1, + infinite_map:1; }; +#define MPTCPOPT_HMAC_LEN 20 #define MPTCP_RM_IDS_MAX 8 struct mptcp_rm_list { @@ -88,7 +90,7 @@ struct mptcp_out_options { u32 nonce; u32 token; u64 thmac; - u8 hmac[20]; + u8 hmac[MPTCPOPT_HMAC_LEN]; }; }; #endif @@ -124,7 +126,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts); bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts); void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info); @@ -217,12 +219,6 @@ static inline bool rsk_drop_req(const struct request_sock *req) return false; } -static inline void mptcp_parse_option(const struct sk_buff *skb, - const unsigned char *ptr, int opsize, - struct tcp_options_received *opt_rx) -{ -} - static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts) @@ -289,4 +285,14 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + +#if !IS_ENABLED(CONFIG_MPTCP) +struct mptcp_sock { }; +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/net/mrp.h b/include/net/mrp.h index 1c308c034e1a..92cd3fb6cf9d 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -2,6 +2,10 @@ #ifndef _NET_MRP_H #define _NET_MRP_H +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> + #define MRP_END_MARK 0x0 struct mrp_pdu_hdr { diff --git a/include/net/ncsi.h b/include/net/ncsi.h index fbefe80361ee..08a50d9acb0a 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -2,6 +2,8 @@ #ifndef __NET_NCSI_H #define __NET_NCSI_H +#include <linux/types.h> + /* * The NCSI device states seen from external. More NCSI device states are * only visible internally (in net/ncsi/internal.h). When the NCSI device diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 04341d86585d..da7eec8669ec 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -411,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -428,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -459,10 +447,15 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce); +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr); + void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, @@ -487,9 +480,9 @@ int igmp6_late_init(void); void igmp6_cleanup(void); void igmp6_late_cleanup(void); -int igmp6_event_query(struct sk_buff *skb); +void igmp6_event_query(struct sk_buff *skb); -int igmp6_event_report(struct sk_buff *skb); +void igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 38a0c1d24570..20745cf7ae1a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -48,6 +48,7 @@ enum { NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, + NEIGH_VAR_INTERVAL_PROBE_TIME_MS, NEIGH_VAR_GC_STALETIME, NEIGH_VAR_QUEUE_LEN_BYTES, NEIGH_VAR_PROXY_QLEN, @@ -70,6 +71,7 @@ enum { struct neigh_parms { possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; @@ -81,6 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; + int qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; @@ -158,6 +161,7 @@ struct neighbour { struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; + netdevice_tracker dev_tracker; u8 primary_key[0]; } __randomize_layout; @@ -173,6 +177,7 @@ struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; u32 flags; u8 protocol; u8 key[]; @@ -271,11 +276,6 @@ static inline void *neighbour_priv(const struct neighbour *n) extern const struct nla_policy nda_policy[]; -static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) -{ - return *(const u16 *)n->primary_key == *(const u16 *)pkey; -} - static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; @@ -321,6 +321,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } +static inline void neigh_confirm(struct neighbour *n) +{ + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + } +} + void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -336,7 +347,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -446,17 +458,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { diff --git a/include/net/net_debug.h b/include/net/net_debug.h new file mode 100644 index 000000000000..1e74684cbbdb --- /dev/null +++ b/include/net/net_debug.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NET_DEBUG_H +#define _LINUX_NET_DEBUG_H + +#include <linux/bug.h> +#include <linux/kern_levels.h> + +struct net_device; + +__printf(3, 4) __cold +void netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...); +__printf(2, 3) __cold +void netdev_emerg(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_alert(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_crit(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_err(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_warn(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_notice(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_info(const struct net_device *dev, const char *format, ...); + +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __section(".data.once") __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_netdev_dbg(__dev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ +}) +#endif + +#if defined(VERBOSE_DEBUG) +#define netdev_vdbg netdev_dbg +#else + +#define netdev_vdbg(dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* netif printk helpers, similar to netdev_printk */ + +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netif_dbg(priv, type, netdev, format, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + dynamic_netdev_dbg(netdev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netif_dbg(priv, type, dev, format, args...) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) +#else +#define netif_dbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* if @cond then downgrade to debug, else print at @level */ +#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ + do { \ + if (cond) \ + netif_dbg(priv, type, netdev, fmt, ##args); \ + else \ + netif_ ## level(priv, type, netdev, fmt, ##args); \ + } while (0) + +#if defined(VERBOSE_DEBUG) +#define netif_vdbg netif_dbg +#else +#define netif_vdbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + + +#if defined(CONFIG_DEBUG_NET) +#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#else +#define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#endif + +#endif /* _LINUX_NET_DEBUG_H */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bb5fa5914032..8c3587d5c308 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -26,6 +26,9 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) +#include <net/netns/flow_table.h> +#endif #include <net/netns/nftables.h> #include <net/netns/xfrm.h> #include <net/netns/mpls.h> @@ -34,6 +37,7 @@ #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> +#include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -62,7 +66,7 @@ struct net { */ spinlock_t rules_mod_lock; - unsigned int dev_unreg_count; + atomic_t dev_unreg_count; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; @@ -87,6 +91,7 @@ struct net { struct idr netns_ids; struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; struct list_head dev_base_head; struct proc_dir_entry *proc_net; @@ -118,7 +123,9 @@ struct net { struct netns_core core; struct netns_mib mib; struct netns_packet packet; +#if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; +#endif struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) @@ -138,6 +145,9 @@ struct net { #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) struct netns_nftables nft; #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + struct netns_ft ft; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; @@ -240,6 +250,7 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { refcount_inc(&net->ns.count); @@ -258,6 +269,7 @@ static inline struct net *maybe_get_net(struct net *net) return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { if (refcount_dec_and_test(&net->ns.count)) @@ -308,6 +320,36 @@ static inline int check_net(const struct net *net) #endif +static inline void netns_tracker_alloc(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netns_tracker_free(struct net *net, + netns_tracker *tracker) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(&net->refcnt_tracker, tracker); +#endif +} + +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); + return net; +} + +static inline void put_net_track(struct net *net, netns_tracker *tracker) +{ + netns_tracker_free(net, tracker); + put_net(net); +} + typedef struct { #ifdef CONFIG_NET_NS struct net *net; @@ -479,4 +521,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h new file mode 100644 index 000000000000..d94c76cf15a9 --- /dev/null +++ b/include/net/net_trackers.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_TRACKERS_H +#define __NET_NET_TRACKERS_H +#include <linux/ref_tracker.h> + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +typedef struct ref_tracker *netdevice_tracker; +#else +typedef struct {} netdevice_tracker; +#endif + +#ifdef CONFIG_NET_NS_REFCNT_TRACKER +typedef struct ref_tracker *netns_tracker; +#else +typedef struct {} netns_tracker; +#endif + +#endif /* __NET_NET_TRACKERS_H */ diff --git a/include/net/netevent.h b/include/net/netevent.h index 4107016c3bb4..1be3757a8b7f 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -14,6 +14,7 @@ struct dst_entry; struct neighbour; +struct notifier_block ; struct netevent_redirect { struct dst_entry *old; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d24b0a34c8f0..6a2019aaa464 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -43,12 +43,16 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + spinlock_t dying_lock; + struct hlist_nulls_head dying_list; +}; + struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; unsigned int expect_count; - u8 sysctl_auto_assign_helper; - bool auto_assign_helper_warned; /* only used from work queues, configuration plane, and so on: */ unsigned int users4; @@ -58,8 +62,7 @@ struct nf_conntrack_net { struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct delayed_work ecache_dwork; - struct netns_ct *ct_net; + struct nf_conntrack_net_ecache ecache; #endif }; @@ -76,6 +79,8 @@ struct nf_conn { * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; @@ -94,7 +99,6 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) @@ -169,11 +173,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) return (struct nf_conn *)(nfct & NFCT_PTRMASK); } +void nf_ct_destroy(struct nf_conntrack *nfct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - WARN_ON(!ct); - nf_conntrack_put(&ct->ct_general); + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); } /* Protocol module loading */ @@ -228,13 +234,16 @@ static inline bool nf_ct_kill(struct nf_conn *ct) return nf_ct_delete(ct, 0, 0); } -/* Set all unconfirmed conntrack as dying */ -void nf_ct_unconfirmed_destroy(struct net *); +struct nf_ct_iter_data { + struct net *net; + void *data; + u32 portid; + int report; +}; /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup_net(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), + const struct nf_ct_iter_data *iter_data); /* also set unconfirmed conntracks as dying. Only use in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), @@ -278,7 +287,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; - return timeout > 0 ? timeout : 0; + return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 7f44a771530e..4b2b7f8914ea 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -78,7 +78,6 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir, void nf_conntrack_acct_pernet_init(struct net *net); -int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h new file mode 100644 index 000000000000..078d3c52c03f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_ACT_CT_H +#define _NF_CONNTRACK_ACT_CT_H + +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_act_ct_ext { + int ifindex[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); +#else + return NULL; +#endif +} + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); + + if (act_ct) + return act_ct; + + act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + return act_ct; +#else + return NULL; +#endif +} + +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +#endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 000000000000..2d0da478c8e0 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include <linux/kconfig.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conn___init { + struct nf_conn ct; +}; + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); +extern void cleanup_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +static inline void cleanup_nf_conntrack_bpf(void) +{ +} + +#endif + +#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_nat_bpf(void); + +#else + +static inline int register_nf_nat_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 13807ea94cd2..b2b9de70d9f4 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,9 +58,14 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); - if (likely(ret == NF_ACCEPT)) + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + + if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) nf_ct_deliver_cached_events(ct); } return ret; @@ -79,4 +84,17 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) +{ + if (timeout > INT_MAX) + timeout = INT_MAX; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); +} + +int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); + #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 9645b47fa7e4..e227d997fc71 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -10,6 +10,7 @@ struct nf_conncount_data; struct nf_conncount_list { spinlock_t list_lock; + u32 last_gc; /* jiffies at most recent gc */ struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ }; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index d932e22edcb4..0c1dac318e02 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,17 +14,15 @@ #include <net/netfilter/nf_conntrack_extend.h> enum nf_ct_ecache_state { - NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - enum nf_ct_ecache_state state:8;/* ecache state */ + u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; @@ -38,28 +36,12 @@ nf_ct_ecache_find(const struct nf_conn *ct) #endif } -static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *e; - - if (!ctmask && !expmask && net->ct.sysctl_events) { - ctmask = ~0; - expmask = ~0; - } - if (!ctmask && !expmask) - return NULL; - - e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); - if (e) { - e->ctmask = ctmask; - e->expmask = expmask; - } - return e; + return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); #else - return NULL; + return false; #endif } @@ -91,6 +73,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); +bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); #else static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) @@ -105,6 +88,10 @@ static inline int nf_conntrack_eventmask_report(unsigned int eventmask, return 0; } +static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +{ + return false; +} #endif static inline void @@ -130,30 +117,20 @@ nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, portid, report); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); #endif + return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); #endif + return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -166,8 +143,7 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); -int nf_conntrack_ecache_init(void); -void nf_conntrack_ecache_fini(void); +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { @@ -194,16 +170,6 @@ static inline void nf_conntrack_ecache_pernet_init(struct net *net) static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { } - -static inline int nf_conntrack_ecache_init(void) -{ - return 0; -} - -static inline void nf_conntrack_ecache_fini(void) -{ -} - static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e1e588387103..0b247248b032 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -28,24 +28,18 @@ enum nf_ct_ext_id { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) NF_CT_EXT_SYNPROXY, #endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + NF_CT_EXT_ACT_CT, +#endif NF_CT_EXT_NUM, }; -#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help -#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat -#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj -#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct -#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp -#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout -#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels -#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy - /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - char data[]; + unsigned int gen_id; + char data[] __aligned(8); }; static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) @@ -58,33 +52,28 @@ static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); } -static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) +void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); + +static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) { - if (!nf_ct_ext_exist(ct, id)) + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, id)) return NULL; + if (unlikely(ext->gen_id)) + return __nf_ct_ext_find(ext, id); + return (void *)ct->ext + ct->ext->offset[id]; } -#define nf_ct_ext_find(ext, id) \ - ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) - -/* Destroy all relationships */ -void nf_ct_ext_destroy(struct nf_conn *ct); /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); -struct nf_ct_ext_type { - /* Destroys relationships (can be NULL). */ - void (*destroy)(struct nf_conn *ct); - - enum nf_ct_ext_id id; - - /* Length and min alignment. */ - u8 len; - u8 align; -}; +/* ext genid. if ext->id != ext_genid, extensions cannot be used + * anymore unless conntrack has CONFIRMED bit set. + */ +extern atomic_t nf_conntrack_ext_genid; +void nf_ct_ext_bump_genid(void); -int nf_ct_extend_register(const struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 37f0fbefb060..9939c366f720 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum); void nf_nat_helper_put(struct nf_conntrack_helper *helper); +void nf_ct_set_auto_assign_helper_warned(struct net *net); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index ba916411c4e1..66bab6c60d12 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -17,10 +17,18 @@ struct nf_conn_labels { unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; +/* Can't use nf_ct_ext_find(), flow dissector cannot use symbols + * exported by nf_conntrack module. + */ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS]; #else return NULL; #endif @@ -45,12 +53,9 @@ int nf_connlabels_replace(struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); -void nf_conntrack_labels_fini(void); int nf_connlabels_get(struct net *net, unsigned int bit); void nf_connlabels_put(struct net *net); #else -static inline int nf_conntrack_labels_init(void) { return 0; } -static inline void nf_conntrack_labels_fini(void) {} static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netfilter/nf_conntrack_seqadj.h b/include/net/netfilter/nf_conntrack_seqadj.h index 0a10b50537ae..883c414b768e 100644 --- a/include/net/netfilter/nf_conntrack_seqadj.h +++ b/include/net/netfilter/nf_conntrack_seqadj.h @@ -42,7 +42,4 @@ int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff); s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq); -int nf_conntrack_seqadj_init(void); -void nf_conntrack_seqadj_fini(void); - #endif /* _NF_CONNTRACK_SEQADJ_H */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 659b0ea25b4d..9fdaba911de6 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -17,14 +17,6 @@ struct nf_ct_timeout { char data[]; }; -struct ctnl_timeout { - struct list_head head; - struct rcu_head rcu_head; - refcount_t refcnt; - char name[CTNL_TIMEOUT_NAME_MAX]; - struct nf_ct_timeout timeout; -}; - struct nf_conn_timeout { struct nf_ct_timeout __rcu *timeout; }; @@ -89,23 +81,11 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -int nf_conntrack_timeout_init(void); -void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name); void nf_ct_destroy_timeout(struct nf_conn *ct); #else -static inline int nf_conntrack_timeout_init(void) -{ - return 0; -} - -static inline void nf_conntrack_timeout_fini(void) -{ - return; -} - static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) @@ -120,8 +100,12 @@ static inline void nf_ct_destroy_timeout(struct nf_conn *ct) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); -extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout); +struct nf_ct_timeout_hooks { + struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name); + void (*timeout_put)(struct nf_ct_timeout *timeout); +}; + +extern const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook; #endif #endif /* _NF_CONNTRACK_TIMEOUT_H */ diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 820ea34b6029..57138d974a9f 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -40,21 +40,8 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP void nf_conntrack_tstamp_pernet_init(struct net *net); - -int nf_conntrack_tstamp_init(void); -void nf_conntrack_tstamp_fini(void); #else static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} - -static inline int nf_conntrack_tstamp_init(void) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_fini(void) -{ - return; -} #endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ #endif /* _NF_CONNTRACK_TSTAMP_H */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index a3647fadf1cc..cd982f4a0f50 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -10,6 +10,8 @@ #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/flow_offload.h> #include <net/dst.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> struct nf_flowtable; struct nf_flow_rule; @@ -96,6 +98,7 @@ enum flow_offload_xmit_type { FLOW_OFFLOAD_XMIT_NEIGH, FLOW_OFFLOAD_XMIT_XFRM, FLOW_OFFLOAD_XMIT_DIRECT, + FLOW_OFFLOAD_XMIT_TC, }; #define NF_FLOW_TABLE_ENCAP_MAX 2 @@ -127,7 +130,7 @@ struct flow_offload_tuple { struct { } __hash; u8 dir:2, - xmit_type:2, + xmit_type:3, encap_num:2, in_vlan_ingress:2; u16 mtu; @@ -142,6 +145,9 @@ struct flow_offload_tuple { u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; + struct { + u32 iifidx; + } tc; }; }; @@ -264,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); +void nf_flow_table_gc_run(struct nf_flowtable *flow_table); void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, struct net_device *dev); void nf_flow_table_cleanup(struct net_device *dev); @@ -300,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, struct flow_offload *flow); void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd); @@ -313,4 +322,41 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + +#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ + this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \ + this_cpu_dec((net)->ft.stat->count) + +#ifdef CONFIG_NF_FLOW_TABLE_PROCFS +int nf_flow_table_init_proc(struct net *net); +void nf_flow_table_fini_proc(struct net *net); +#else +static inline int nf_flow_table_init_proc(struct net *net) +{ + return 0; +} + +static inline void nf_flow_table_fini_proc(struct net *net) +{ +} +#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */ + #endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 987111ae5240..e9eb01e99d2f 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -104,7 +104,7 @@ unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); -static inline int nf_nat_initialized(struct nf_conn *ct, +static inline int nf_nat_initialized(const struct nf_conn *ct, enum nf_nat_manip_type manip) { if (manip == NF_NAT_MANIP_SRC) diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index efae84646353..44c421b9be85 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -38,4 +38,5 @@ bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, * to port ct->master->saved_proto. */ void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); +u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port); #endif diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9eed51e920e8..c81021ab07aa 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -37,13 +37,13 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -void nf_queue_entry_get_refs(struct nf_queue_entry *entry); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); static inline void init_hashrandom(u32 *jhash_initval) { while (*jhash_initval == 0) - *jhash_initval = prandom_u32(); + *jhash_initval = get_random_u32(); } static inline u32 hash_v4(const struct iphdr *iph, u32 initval) diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h index 9051c3a0c8e7..7c669792fb9c 100644 --- a/include/net/netfilter/nf_reject.h +++ b/include/net/netfilter/nf_reject.h @@ -5,12 +5,28 @@ #include <linux/types.h> #include <uapi/linux/in.h> -static inline bool nf_reject_verify_csum(__u8 proto) +static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, + __u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum * of the entire payload. */ switch (proto) { + /* Protocols with optional checksums. */ + case IPPROTO_UDP: { + const struct udphdr *udp_hdr; + struct udphdr _udp_hdr; + + udp_hdr = skb_header_pointer(skb, dataoff, + sizeof(_udp_hdr), + &_udp_hdr); + if (!udp_hdr || udp_hdr->check) + return true; + + return false; + } + case IPPROTO_GRE: + /* Protocols with other integrity checks. */ case IPPROTO_AH: case IPPROTO_ESP: @@ -19,9 +35,6 @@ static inline bool nf_reject_verify_csum(__u8 proto) /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: case IPPROTO_DCCP: - - /* Protocols with optional checksums. */ - case IPPROTO_GRE: return false; } return true; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a0d9e0b47ab8..cdb7db9b0e25 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -105,6 +105,8 @@ struct nft_data { }; } __attribute__((aligned(__alignof__(u64)))); +#define NFT_REG32_NUM 20 + /** * struct nft_regs - nf_tables register set * @@ -115,11 +117,22 @@ struct nft_data { */ struct nft_regs { union { - u32 data[20]; + u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + u8 num_reg; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -144,11 +157,26 @@ static inline void nft_reg_store16(u32 *dreg, u16 val) *(u16 *)dreg = val; } +static inline void nft_reg_store_be16(u32 *dreg, __be16 val) +{ + nft_reg_store16(dreg, (__force __u16)val); +} + static inline u16 nft_reg_load16(const u32 *sreg) { return *(u16 *)sreg; } +static inline __be16 nft_reg_load_be16(const u32 *sreg) +{ + return (__force __be16)nft_reg_load16(sreg); +} + +static inline __be32 nft_reg_load_be32(const u32 *sreg) +{ + return *(__force __be32 *)sreg; +} + static inline void nft_reg_store64(u32 *dreg, u64 val) { put_unaligned(val, (u64 *)dreg); @@ -193,13 +221,18 @@ struct nft_ctx { bool report; }; +enum nft_data_desc_flags { + NFT_DATA_DESC_SETELEM = (1 << 0), +}; + struct nft_data_desc { enum nft_data_types type; + unsigned int size; unsigned int len; + unsigned int flags; }; -int nft_data_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla); void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); @@ -346,6 +379,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); struct nft_set_ext; @@ -621,6 +656,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[]; struct nft_set_ext_tmpl { u16 len; u8 offset[NFT_SET_EXT_NUM]; + u8 ext_len[NFT_SET_EXT_NUM]; }; /** @@ -642,18 +678,23 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; - tmpl->len += nft_set_ext_types[id].len + len; + tmpl->ext_len[id] = nft_set_ext_types[id].len + len; + tmpl->len += tmpl->ext_len[id]; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, @@ -884,14 +925,16 @@ struct nft_expr_ops { int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); + bool (*offload_action)(const struct nft_expr *expr); void (*offload_stats)(struct nft_expr *expr, const struct flow_stats *stats); - u32 offload_flags; const struct nft_expr_type *type; void *data; }; @@ -974,6 +1017,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, #define NFT_CHAIN_POLICY_UNSET U8_MAX +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + /** * struct nft_chain - nf_tables chain * @@ -987,8 +1044,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, * @name: name of the chain */ struct nft_chain { - struct nft_rule *__rcu *rules_gen_0; - struct nft_rule *__rcu *rules_gen_1; + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; @@ -1003,7 +1060,7 @@ struct nft_chain { u8 *udata; /* Only used during control plane commit phase: */ - struct nft_rule **rules_next; + struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); @@ -1059,7 +1116,6 @@ struct nft_stats { struct nft_hook { struct list_head list; - bool inactive; struct nf_hook_ops ops; struct rcu_head rcu; }; @@ -1308,24 +1364,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; - const struct nft_rule *rule; + const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, @@ -1592,6 +1652,7 @@ struct nftables_pernet { struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; + u64 table_handle; unsigned int base_seq; u8 validate_state; }; @@ -1603,4 +1664,25 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +#define __NFT_REDUCE_READONLY 1UL +#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY + +static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) +{ + return expr->ops->reduce == NFT_REDUCE_READONLY; +} + +void nft_reg_track_update(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg, u8 len); +void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); +void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); + +static inline bool nft_reg_track_cmp(struct nft_regs_track *track, + const struct nft_expr *expr, u8 dreg) +{ + return track->regs[dreg].selector && + track->regs[dreg].selector->ops == expr->ops && + track->regs[dreg].num_reg == 0; +} + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 0fa5a6d98a00..1223af68cd9a 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,7 @@ extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_bitwise_type; extern struct nft_expr_type nft_byteorder_type; @@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; #endif +extern struct nft_object_type nft_counter_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -40,23 +42,22 @@ struct nft_cmp_fast_expr { bool inv; }; +struct nft_cmp16_fast_expr { + struct nft_data data; + struct nft_data mask; + u8 sreg; + u8 len; + bool inv; +}; + struct nft_immediate_expr { struct nft_data data; u8 dreg; u8 dlen; }; -/* Calculate the mask for the nft_cmp_fast expression. On big endian the - * mask needs to include the *upper* bytes when interpreting that data as - * something smaller than the full u32, therefore a cpu_to_le32 is done. - */ -static inline u32 nft_cmp_fast_mask(unsigned int len) -{ - return cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr, - data) * BITS_PER_BYTE - len)); -} - extern const struct nft_expr_ops nft_cmp_fast_ops; +extern const struct nft_expr_ops nft_cmp16_fast_ops; struct nft_payload { enum nft_payload_bases base:8; @@ -120,6 +121,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); +void nft_counter_init_seqcount(void); + struct nft_expr; struct nft_regs; struct nft_pktinfo; @@ -143,4 +146,6 @@ void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index f9d95ff82df8..3568b6a2f5f0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,8 +67,6 @@ struct nft_flow_rule { struct flow_rule *rule; }; -#define NFT_OFFLOAD_F_ACTION (1 << 0) - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); @@ -94,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net); NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); -int nft_chain_offload_priority(struct nft_base_chain *basechain); +bool nft_chain_offload_support(const struct nft_base_chain *basechain); int nft_offload_init(void); void nft_offload_exit(void); diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 237f3757637e..eed099eae672 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -37,4 +37,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev); + +bool nft_fib_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 2dce55c736f4..9b51cc67de54 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -6,6 +6,7 @@ struct nft_meta { enum nft_meta_keys key:8; + u8 len; union { u8 dreg; u8 sreg; @@ -43,4 +44,6 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); +bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr); #endif diff --git a/include/net/netlink.h b/include/net/netlink.h index 7a2a9d3144ba..6bfa972f2fbf 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -181,6 +181,8 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, + NLA_BE16, + NLA_BE32, __NLA_TYPE_MAX, }; @@ -231,6 +233,7 @@ enum nla_policy_validation { * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, * NLA_S32, NLA_S64, + * NLA_BE16, NLA_BE32, * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" @@ -261,6 +264,8 @@ enum nla_policy_validation { * NLA_U16, * NLA_U32, * NLA_U64, + * NLA_BE16, + * NLA_BE32, * NLA_S8, * NLA_S16, * NLA_S32, @@ -317,18 +322,10 @@ struct nla_policy { u8 validation_type; u16 len; union { - const u32 bitfield32_valid; - const u32 mask; - const char *reject_message; - const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; - struct { - s16 min, max; - }; - int (*validate)(const struct nlattr *attr, - struct netlink_ext_ack *extack); - /* This entry is special, and used for the attribute at index 0 + /** + * @strict_start_type: first attribute to validate strictly + * + * This entry is special, and used for the attribute at index 0 * only, and specifies special data about the policy, namely it * specifies the "boundary type" where strict length validation * starts for any attribute types >= this value, also, strict @@ -347,6 +344,19 @@ struct nla_policy { * was added to enforce strict validation from thereon. */ u16 strict_start_type; + + /* private: use NLA_POLICY_*() to set */ + const u32 bitfield32_valid; + const u32 mask; + const char *reject_message; + const struct nla_policy *nested_policy; + struct netlink_range_validation *range; + struct netlink_range_validation_signed *range_signed; + struct { + s16 min, max; + }; + int (*validate)(const struct nlattr *attr, + struct netlink_ext_ack *extack); }; }; @@ -368,6 +378,8 @@ struct nla_policy { (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64) #define __NLA_IS_SINT_TYPE(tp) \ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) +#define __NLA_IS_BEINT_TYPE(tp) \ + (tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -381,6 +393,7 @@ struct nla_policy { #define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ __NLA_IS_SINT_TYPE(tp) || \ + __NLA_IS_BEINT_TYPE(tp) || \ tp == NLA_MSECS || \ tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ @@ -388,6 +401,8 @@ struct nla_policy { tp != NLA_REJECT && \ tp != NLA_NESTED && \ tp != NLA_NESTED_ARRAY) + tp) +#define NLA_ENSURE_BEINT_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ @@ -741,6 +756,7 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse() @@ -760,6 +776,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse_deprecated() @@ -779,6 +796,7 @@ static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected + * @policy: validation policy * @extack: extended ACK report struct * * See nla_parse_deprecated_strict() @@ -814,7 +832,6 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy - * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index 0ca6a1b87185..2c01a278d1eb 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -6,11 +6,18 @@ #ifndef __NETNS_BPF_H__ #define __NETNS_BPF_H__ -#include <linux/bpf-netns.h> +#include <linux/list.h> struct bpf_prog; struct bpf_prog_array; +enum netns_bpf_attach_type { + NETNS_BPF_INVALID = -1, + NETNS_BPF_FLOW_DISSECTOR = 0, + NETNS_BPF_SK_LOOKUP, + MAX_NETNS_BPF_ATTACH_TYPE +}; + struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; diff --git a/include/net/netns/can.h b/include/net/netns/can.h index 52fbd8291a96..48b79f7e6236 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -7,6 +7,7 @@ #define __NETNS_CAN_H__ #include <linux/spinlock.h> +#include <linux/timer.h> struct can_dev_rcv_lists; struct can_pkg_stats; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0294f3d473af..e1290c159184 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -93,24 +93,17 @@ struct nf_ip_net { #endif }; -struct ct_pcpu { - spinlock_t lock; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; -}; - struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS + u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ u8 sysctl_events; u8 sysctl_acct; - u8 sysctl_auto_assign_helper; u8 sysctl_tstamp; u8 sysctl_checksum; - struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 36c2d998a43c..8249060cf5d0 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -2,6 +2,8 @@ #ifndef __NETNS_CORE_H__ #define __NETNS_CORE_H__ +#include <linux/types.h> + struct ctl_table_header; struct prot_inuse; @@ -10,9 +12,9 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS - int __percpu *sock_inuse; struct prot_inuse __percpu *prot_inuse; #endif }; diff --git a/include/net/netns/flow_table.h b/include/net/netns/flow_table.h new file mode 100644 index 000000000000..1c5fc657e267 --- /dev/null +++ b/include/net/netns/flow_table.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETNS_FLOW_TABLE_H +#define __NETNS_FLOW_TABLE_H + +struct nf_flow_table_stat { + unsigned int count_wq_add; + unsigned int count_wq_del; + unsigned int count_wq_stats; +}; + +struct netns_ft { + struct nf_flow_table_stat __percpu *stat; +}; +#endif diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 8a1ab47c3fb3..00c399edeed1 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -8,6 +8,7 @@ #include <linux/bug.h> #include <linux/rcupdate.h> +#include <net/net_namespace.h> /* * Generic net pointers are to be used by modules to put some private @@ -32,7 +33,7 @@ struct net_generic { struct rcu_head rcu; } s; - void *ptr[0]; + DECLARE_FLEX_ARRAY(void *, ptr); }; }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6c5b2efc4f17..1b8004679445 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/seqlock.h> #include <linux/siphash.h> struct ctl_table_header; @@ -31,18 +32,17 @@ struct ping_group_range { struct inet_hashinfo; struct inet_timewait_death_row { - atomic_t tw_count; - char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)]; + refcount_t tw_refcount; - struct inet_hashinfo *hashinfo; + /* Padding to avoid false sharing, tw_refcount can be often written */ + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_max_tw_buckets; }; struct tcp_fastopen_context; struct netns_ipv4 { - /* Please keep tcp_death_row at first field in netns_ipv4 */ - struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp; + struct inet_timewait_death_row tcp_death_row; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -70,11 +70,9 @@ struct netns_ipv4 { struct hlist_head *fib_table_hash; struct sock *fibnl; - struct sock * __percpu *icmp_sk; struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; struct fqdir *fqdir; u8 sysctl_icmp_echo_ignore_all; @@ -85,6 +83,10 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + u32 ip_rt_min_pmtu; + int ip_rt_mtu_expires; + int ip_rt_min_advmss; + struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; @@ -127,6 +129,7 @@ struct netns_ipv4 { u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; + u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; @@ -160,14 +163,15 @@ struct netns_ipv4 { int sysctl_tcp_challenge_ack_limit; int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; + u8 sysctl_tcp_tso_rtt_log; u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; - u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_wmem[3]; int sysctl_tcp_rmem[3]; + unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; int sysctl_max_syn_backlog; @@ -177,6 +181,8 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + u32 tcp_challenge_timestamp; + u32 tcp_challenge_count; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index a4b550380316..b4af4837d80b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -75,11 +75,12 @@ struct netns_ipv6 { struct list_head fib6_walkers; rwlock_t fib6_walker_lock; spinlock_t fib6_gc_lock; - unsigned int ip6_rt_gc_expire; - unsigned long ip6_rt_last_gc; + atomic_t ip6_rt_gc_expire; + unsigned long ip6_rt_last_gc; + unsigned char flowlabel_has_excl; #ifdef CONFIG_IPV6_MULTIPLE_TABLES - unsigned int fib6_rules_require_fldissect; bool fib6_has_custom_rules; + unsigned int fib6_rules_require_fldissect; #ifdef CONFIG_IPV6_SUBTREES unsigned int fib6_routes_require_src; #endif @@ -88,11 +89,15 @@ struct netns_ipv6 { struct fib6_table *fib6_local_tbl; struct fib_rules_ops *fib6_rules_ops; #endif - struct sock * __percpu *icmp_sk; struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; struct sock *mc_autojoin_sk; + + struct hlist_head *inet6_addr_lst; + spinlock_t addrconf_hash_lock; + struct delayed_work addr_chk_work; + #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES struct mr_table *mrt6; diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index acedef12a35e..1db8f9aaddb4 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -6,6 +6,7 @@ #ifndef __NETNS_MCTP_H__ #define __NETNS_MCTP_H__ +#include <linux/mutex.h> #include <linux/types.h> struct netns_mctp { diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index a7bdcfbb0b28..19ad2574b267 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -6,6 +6,8 @@ #ifndef __NETNS_MPLS_H__ #define __NETNS_MPLS_H__ +#include <linux/types.h> + struct mpls_route; struct ctl_table_header; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index b593f95e9991..02bbdc577f8e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -24,9 +24,6 @@ struct netns_nf { #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; #endif -#if IS_ENABLED(CONFIG_DECNET) - struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; -#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) unsigned int defrag_ipv4_users; #endif diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h index 1849e77eb68a..434239b37014 100644 --- a/include/net/netns/nexthop.h +++ b/include/net/netns/nexthop.h @@ -6,6 +6,7 @@ #ifndef __NETNS_NEXTHOP_H__ #define __NETNS_NEXTHOP_H__ +#include <linux/notifier.h> #include <linux/rbtree.h> struct netns_nexthop { diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 40240722cdca..a681147aecd8 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -2,6 +2,9 @@ #ifndef __NETNS_SCTP_H__ #define __NETNS_SCTP_H__ +#include <linux/timer.h> +#include <net/snmp.h> + struct sock; struct proc_dir_entry; struct sctp_mib; diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index ea8a9cf2619b..582212ada3ba 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -12,5 +12,15 @@ struct netns_smc { /* protect fback_rsn */ struct mutex mutex_fback_rsn; struct smc_stats_rsn *fback_rsn; + + bool limit_smc_hs; /* constraint on handshake */ +#ifdef CONFIG_SYSCTL + struct ctl_table_header *smc_hdr; +#endif + unsigned int sysctl_autocorking_size; + unsigned int sysctl_smcr_buf_type; + int sysctl_smcr_testlink_time; + int sysctl_wmem; + int sysctl_rmem; }; #endif diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 91a3d7e39198..9859d134d5a8 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,8 +5,16 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +#include <linux/spinlock.h> + +struct unix_table { + spinlock_t *locks; + struct hlist_head *buckets; +}; + struct ctl_table_header; struct netns_unix { + struct unix_table table; int sysctl_max_dgram_qlen; struct ctl_table_header *ctl; }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 947733a639a6..bd7c3be4af5d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -66,11 +66,7 @@ struct netns_xfrm { int sysctl_larval_drop; u32 sysctl_acq_expires; - u8 policy_default; -#define XFRM_POL_DEFAULT_IN 1 -#define XFRM_POL_DEFAULT_OUT 2 -#define XFRM_POL_DEFAULT_FWD 4 -#define XFRM_POL_DEFAULT_MASK 7 + u8 policy_default[XFRM_POLICY_MAX]; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; diff --git a/include/net/netrom.h b/include/net/netrom.h index 80f15b1c1a48..f0565a5987d1 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -14,6 +14,7 @@ #include <net/sock.h> #include <linux/refcount.h> #include <linux/seq_file.h> +#include <net/ax25.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 145acb8f2509..f5850b569c52 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -58,9 +58,6 @@ enum nl802154_commands { NL802154_CMD_SET_WPAN_PHY_NETNS, - /* add new commands above here */ - -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL NL802154_CMD_SET_SEC_PARAMS, NL802154_CMD_GET_SEC_KEY, /* can dump */ NL802154_CMD_NEW_SEC_KEY, @@ -74,7 +71,8 @@ enum nl802154_commands { NL802154_CMD_GET_SEC_LEVEL, /* can dump */ NL802154_CMD_NEW_SEC_LEVEL, NL802154_CMD_DEL_SEC_LEVEL, -#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ + + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ __NL802154_CMD_AFTER_LAST, diff --git a/include/net/p8022.h b/include/net/p8022.h index c2bacc66bfbc..b690ffcad66b 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_P8022_H #define _NET_P8022_H + +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, diff --git a/include/net/page_pool.h b/include/net/page_pool.h index a4082406a003..813c93499f20 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -80,8 +80,73 @@ struct page_pool_params { enum dma_data_direction dma_dir; /* DMA mapping direction */ unsigned int max_len; /* max DMA sync memory size */ unsigned int offset; /* DMA addr offset */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#else + +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} + +#endif + struct page_pool { struct page_pool_params p; @@ -95,6 +160,12 @@ struct page_pool { struct page *frag_page; long frag_users; +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif + u32 xdp_mem_id; + /* * Data structure for allocation side * @@ -123,6 +194,10 @@ struct page_pool { */ struct ptr_ring ring; +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif atomic_t pages_state_release_cnt; /* A page_pool is strictly tied to a single RX-queue being @@ -168,9 +243,12 @@ bool page_pool_return_skb_page(struct page *page); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct xdp_mem_info; + #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); void page_pool_release_page(struct page_pool *pool, struct page *page); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); @@ -180,7 +258,8 @@ static inline void page_pool_destroy(struct page_pool *pool) } static inline void page_pool_use_xdp_mem(struct page_pool *pool, - void (*disconnect)(void *)) + void (*disconnect)(void *), + struct xdp_mem_info *mem) { } static inline void page_pool_release_page(struct page_pool *pool, @@ -194,21 +273,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct); +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); -/* Same as above but will try to sync the entire area pool->max_len */ -static inline void page_pool_put_full_page(struct page_pool *pool, - struct page *page, bool allow_direct) +static inline void page_pool_fragment_page(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_defrag_page(struct page *page, long nr) +{ + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining + * references to the page. No need to actually overwrite it, instead + * we can leave this to be overwritten by the calling function. + * + * The main advantage to doing this is that an atomic_read is + * generally a much cheaper operation than an atomic update, + * especially when dealing with a page that may be partitioned + * into only 2 or 3 pieces. + */ + if (atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; +} + +static inline bool page_pool_is_last_frag(struct page_pool *pool, + struct page *page) +{ + /* If fragments aren't enabled or count is 0 we were the last user */ + return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + (page_pool_defrag_page(page, 1) == 0); +} + +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size, + bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - page_pool_put_page(pool, page, -1, allow_direct); + if (!page_pool_is_last_frag(pool, page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); #endif } +/* Same as above but will try to sync the entire area pool->max_len */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + /* Same as above but the caller must guarantee safe context. e.g NAPI */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) @@ -236,30 +361,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) page->dma_addr_upper = upper_32_bits(addr); } -static inline void page_pool_set_frag_count(struct page *page, long nr) -{ - atomic_long_set(&page->pp_frag_count, nr); -} - -static inline long page_pool_atomic_sub_frag_count_return(struct page *page, - long nr) -{ - long ret; - - /* As suggested by Alexander, atomic_long_read() may cover up the - * reference count errors, so avoid calling atomic_long_read() in - * the cases of freeing or draining the page_frags, where we would - * not expect it to match or that are slowpath anyway. - */ - if (__builtin_constant_p(nr) && - atomic_long_read(&page->pp_frag_count) == nr) - return 0; - - ret = atomic_long_sub_return(nr, &page->pp_frag_count); - WARN_ON(ret < 0); - return ret; -} - static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 27b1ab5e4e6d..645dddf5ce77 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -10,6 +10,9 @@ #ifndef NET_PHONET_PEP_H #define NET_PHONET_PEP_H +#include <linux/skbuff.h> +#include <net/phonet/phonet.h> + struct pep_sock { struct pn_sock pn_sk; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index a27bdc6cfeab..862f1719b523 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -10,6 +10,10 @@ #ifndef AF_PHONET_H #define AF_PHONET_H +#include <linux/phonet.h> +#include <linux/skbuff.h> +#include <net/sock.h> + /* * The lower layers may not require more space, ever. Make sure it's * enough. diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 05b49d4d2b11..e9dc8dca5817 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -10,6 +10,11 @@ #ifndef PN_DEV_H #define PN_DEV_H +#include <linux/list.h> +#include <linux/mutex.h> + +struct net; + struct phonet_device_list { struct list_head list; struct mutex lock; diff --git a/include/net/ping.h b/include/net/ping.h index 2fe78874318c..e4ff3911cbf5 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -bool ping_rcv(struct sk_buff *skb); +enum skb_drop_reason ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 193f88ebf629..4cabb32a2ad9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -23,7 +23,7 @@ struct tcf_walker { }; int register_tcf_proto_ops(struct tcf_proto_ops *ops); -int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +void unregister_tcf_proto_ops(struct tcf_proto_ops *ops); struct tcf_block_ext_info { enum flow_block_binder_type binder_type; @@ -81,6 +81,19 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +static inline bool tc_cls_stats_dump(struct tcf_proto *tp, + struct tcf_walker *arg, + void *filter) +{ + if (arg->count >= arg->skip && arg->fn(tp, filter, arg) < 0) { + arg->stop = 1; + return false; + } + + arg->count++; + return true; +} + #else static inline bool tcf_block_shared(struct tcf_block *block) { @@ -197,12 +210,25 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) __tcf_unbind_filter(q, r); } +static inline void tc_cls_bind_class(u32 classid, unsigned long cl, + void *q, struct tcf_result *res, + unsigned long base) +{ + if (res->classid == classid) { + if (cl) + __tcf_bind_filter(q, res, base); + else + __tcf_unbind_filter(q, res); + } +} + struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -217,6 +243,9 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); @@ -236,6 +265,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT exts->net = maybe_get_net(exts->net); + if (exts->net) + netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL); return exts->net != NULL; #else return true; @@ -246,7 +277,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT if (exts->net) - put_net(exts->net); + put_net_track(exts->net, &exts->ns_tracker); #endif } @@ -258,26 +289,31 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (; 0; (void)(i), (void)(a), (void)(exts)) #endif +#define tcf_act_for_each_action(i, a, actions) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) + static inline void -tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) +tcf_exts_hw_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 drops, u64 lastuse, + u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT int i; - preempt_disable(); - for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; - } + /* if stats from hw, just skip */ + if (tcf_action_update_hw_stats(a)) { + preempt_disable(); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); + preempt_enable(); - preempt_enable(); + a->used_hw_stats = used_hw_stats; + a->used_hw_stats_valid = used_hw_stats_valid; + } + } #endif } @@ -321,6 +357,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -532,9 +571,13 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return ifindex == skb->skb_iif; } -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); -void tc_cleanup_flow_action(struct flow_action *flow_action); +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts, + struct netlink_ext_ack *extack); +void tc_cleanup_offload_action(struct flow_action *flow_action); +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[], + struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); @@ -1012,4 +1055,15 @@ struct tc_fifo_qopt_offload { }; }; +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc); +void tc_skb_ext_tc_enable(void); +void tc_skb_ext_tc_disable(void); +#define tc_skb_ext_tc_enabled() static_branch_unlikely(&tc_skb_ext_tc) +#else /* CONFIG_NET_CLS_ACT */ +static inline void tc_skb_ext_tc_enable(void) { } +static inline void tc_skb_ext_tc_disable(void) { } +#define tc_skb_ext_tc_enabled() false +#endif + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af..38207873eda6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void) return PSCHED_NS2TICKS(ktime_get_ns()); } -static inline psched_tdiff_t -psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) -{ - return min(tv1 - tv2, bound); -} - struct qdisc_watchdog { u64 last_expires; struct hrtimer timer; @@ -106,7 +100,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, struct netlink_ext_ack *extack); int register_qdisc(struct Qdisc_ops *qops); -int unregister_qdisc(struct Qdisc_ops *qops); +void unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); @@ -147,6 +141,11 @@ static inline struct net *qdisc_net(struct Qdisc *q) return dev_net(q->dev_queue->dev); } +struct tc_query_caps_base { + enum tc_setup_type type; + void *caps; +}; + struct tc_cbs_qopt_offload { u8 enable; s32 queue; @@ -161,6 +160,10 @@ struct tc_etf_qopt_offload { s32 queue; }; +struct tc_taprio_caps { + bool supports_queue_max_sdu:1; +}; + struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ @@ -174,16 +177,34 @@ struct tc_taprio_qopt_offload { ktime_t base_time; u64 cycle_time; u64 cycle_time_extension; + u32 max_sdu[TC_MAX_QUEUE]; size_t num_entries; struct tc_taprio_sched_entry entries[]; }; +#if IS_ENABLED(CONFIG_NET_SCH_TAPRIO) + /* Reference counting */ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload *offload); void taprio_offload_free(struct tc_taprio_qopt_offload *offload); +#else + +/* Reference counting */ +static inline struct tc_taprio_qopt_offload * +taprio_offload_get(struct tc_taprio_qopt_offload *offload) +{ + return NULL; +} + +static inline void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ +} + +#endif + /* Ensure skb_mstamp_ns, which might have been populated with the txtime, is * not mistaken for a software timestamp, because this will otherwise prevent * the dispatch of hardware timestamps to the socket. @@ -193,4 +214,35 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; + u16 zone; /* Only valid if post_ct = true */ +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + +static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, + unsigned long cl, + struct qdisc_walker *arg) +{ + if (arg->count >= arg->skip && arg->fn(sch, cl, arg) < 0) { + arg->stop = 1; + return false; + } + + arg->count++; + return true; +} + #endif diff --git a/include/net/pptp.h b/include/net/pptp.h index 383e25ca53a7..e63176bdd4c8 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -2,6 +2,9 @@ #ifndef _NET_PPTP_H #define _NET_PPTP_H +#include <linux/types.h> +#include <net/gre.h> + #define PPP_LCP_ECHOREQ 0x09 #define PPP_LCP_ECHOREP 0x0A #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) diff --git a/include/net/protocol.h b/include/net/protocol.h index f51c06ae365f..6aef8cb11cc8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -52,8 +50,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/psnap.h b/include/net/psnap.h index 7cb0c8ab4171..88802b0754ad 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -2,6 +2,11 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H +struct datalink_proto; +struct sk_buff; +struct packet_type; +struct net_device; + struct datalink_proto * register_snap_client(const unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..5e665934ebc7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -20,9 +20,8 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; -struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, - __be32 laddr, int dif, int sdif); +bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); @@ -33,10 +32,19 @@ int raw_rcv(struct sock *, struct sk_buff *); #define RAW_HTABLE_SIZE MAX_INET_PROTOS struct raw_hashinfo { - rwlock_t lock; - struct hlist_head ht[RAW_HTABLE_SIZE]; + spinlock_t lock; + struct hlist_nulls_head ht[RAW_HTABLE_SIZE]; }; +static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) +{ + int i; + + spin_lock_init(&hashinfo->lock); + for (i = 0; i < RAW_HTABLE_SIZE; i++) + INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); +} + #ifdef CONFIG_PROC_FS int raw_proc_init(void); void raw_proc_exit(void); @@ -75,7 +83,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 53d86b6055e8..bc70909625f6 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,11 +3,12 @@ #define _NET_RAWV6_H #include <net/protocol.h> +#include <net/raw.h> extern struct raw_hashinfo raw_v6_hashinfo; -struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif, int sdif); +bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); diff --git a/include/net/red.h b/include/net/red.h index be11dbd26492..425364de0df7 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -122,7 +122,6 @@ struct red_stats { u32 forced_drop; /* Forced drops, qavg > max_thresh */ u32 forced_mark; /* Forced marks, qavg > max_thresh */ u32 pdrop; /* Drops due to queue limits */ - u32 other; /* Drops due to drop() calls */ }; struct red_parms { @@ -364,7 +363,7 @@ static inline unsigned long red_calc_qavg(const struct red_parms *p, static inline u32 red_random(const struct red_parms *p) { - return reciprocal_divide(prandom_u32(), p->max_P_reciprocal); + return reciprocal_divide(get_random_u32(), p->max_P_reciprocal); } static inline int red_mark_probability(const struct red_parms *p, diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67..896191f420d5 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -1,3 +1,4 @@ + #ifndef __NET_REGULATORY_H #define __NET_REGULATORY_H /* @@ -19,6 +20,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include <linux/ieee80211.h> +#include <linux/nl80211.h> #include <linux/rcupdate.h> /** diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 29e41ff3ec93..144c39db9898 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -70,6 +70,7 @@ struct request_sock { struct saved_syn *saved_syn; u32 secid; u32 peer_secid; + u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) @@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; + req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; diff --git a/include/net/rose.h b/include/net/rose.h index 0f0a4ce0fee7..23267b4efcfa 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -9,6 +9,7 @@ #define _ROSE_H #include <linux/rose.h> +#include <net/ax25.h> #include <net/sock.h> #define ROSE_ADDR_LEN 5 @@ -131,7 +132,8 @@ struct rose_sock { ax25_address source_digis[ROSE_MAX_DIGIS]; ax25_address dest_digis[ROSE_MAX_DIGIS]; struct rose_neigh *neighbour; - struct net_device *device; + struct net_device *device; + netdevice_tracker dev_tracker; unsigned int lci, rand; unsigned char state, condition, qbitincl, defer; unsigned char cause, diagnostic; diff --git a/include/net/route.h b/include/net/route.h index 2e6c0e153e3a..6e92dd5bcd61 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,20 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +static inline __u8 ip_sock_rt_scope(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_LOCALROUTE)) + return RT_SCOPE_LINK; + + return RT_SCOPE_UNIVERSE; +} + +static inline __u8 ip_sock_rt_tos(const struct sock *sk) +{ + return RT_TOS(inet_sk(sk)->tos); +} + +struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; @@ -187,10 +201,6 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); -int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, - struct fib_result *res); - int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin, const struct sk_buff *hint); @@ -230,8 +240,7 @@ void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, - unsigned int flags, u16 type, - bool nopolicy, bool noxfrm); + unsigned int flags, u16 type, bool noxfrm); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; @@ -288,39 +297,38 @@ static inline char rt_tos2priority(u8 tos) * ip_route_newports() calls. */ -static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, - u32 tos, int oif, u8 protocol, +static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk) + const struct sock *sk) { __u8 flow_flags = 0; if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport, - sk->sk_uid); + flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + ip_sock_rt_scope(sk), protocol, flow_flags, dst, + src, dport, sport, sk->sk_uid); } -static inline struct rtable *ip_route_connect(struct flowi4 *fl4, - __be32 dst, __be32 src, u32 tos, - int oif, u8 protocol, +static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; - ip_route_connect_init(fl4, dst, src, tos, oif, protocol, - sport, dport, sk); + ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); - flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); + flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr, + fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -360,7 +368,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } @@ -369,7 +377,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 9f48733bfd21..bf8bb3357825 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { - RTNL_FLAG_DOIT_UNLOCKED = 1, + RTNL_FLAG_DOIT_UNLOCKED = BIT(0), + RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), }; +enum rtnl_kinds { + RTNL_KIND_NEW, + RTNL_KIND_DEL, + RTNL_KIND_GET, + RTNL_KIND_SET +}; +#define RTNL_KIND_MASK 0x3 + +static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) +{ + return msgtype & RTNL_KIND_MASK; +} + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72..d5517719af4e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -125,7 +125,7 @@ struct Qdisc { spinlock_t seqlock; struct rcu_head rcu; - + netdevice_tracker dev_tracker; /* private data */ long privdata[] ____cacheline_aligned; }; @@ -187,37 +187,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) return true; - /* Paired with smp_mb__after_atomic() to make sure - * STATE_MISSED checking is synchronized with clearing - * in pfifo_fast_dequeue(). - */ - smp_mb__before_atomic(); - - /* If the MISSED flag is set, it means other thread has - * set the MISSED flag before second spin_trylock(), so - * we can return false here to avoid multi cpus doing - * the set_bit() and second spin_trylock() concurrently. + /* No need to insist if the MISSED flag was already set. + * Note that test_and_set_bit() also gives us memory ordering + * guarantees wrt potential earlier enqueue() and below + * spin_trylock(), both of which are necessary to prevent races */ - if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; - /* Set the MISSED flag before the second spin_trylock(), - * if the second spin_trylock() return false, it means - * other cpu holding the lock will do dequeuing for us - * or it will see the MISSED flag set after releasing - * lock and reschedule the net_tx_action() to do the - * dequeuing. - */ - set_bit(__QDISC_STATE_MISSED, &qdisc->state); - - /* spin_trylock() only has load-acquire semantic, so use - * smp_mb__after_atomic() to ensure STATE_MISSED is set - * before doing the second spin_trylock(). - */ - smp_mb__after_atomic(); - - /* Retry again in case other CPU may not see the new flag - * after it releases the lock at the end of qdisc_run_end(). + /* Try to take the lock again to make sure that we will either + * grab it or the CPU that still has it will see MISSED set + * when testing it in qdisc_run_end() */ return spin_trylock(&qdisc->seqlock); } @@ -229,6 +209,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + /* spin_unlock() only has store-release semantic. The unlock + * and test_bit() ordering is a store-load ordering, so a full + * memory barrier is needed here. + */ + smp_mb(); + if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); @@ -340,11 +326,6 @@ struct tcf_result { }; const struct tcf_proto *goto_tp; - /* used in the skb_tc_reinsert function */ - struct { - bool ingress; - struct gnet_stats_queue *qstats; - }; }; }; @@ -447,8 +428,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -520,11 +499,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } -static inline int qdisc_qlen_cpu(const struct Qdisc *q) -{ - return this_cpu_ptr(q->cpu_qstats)->qlen; -} - static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; @@ -572,25 +546,6 @@ static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) return qdisc->dev_queue->qdisc_sleeping; } -/* The qdisc root lock is a mechanism by which to top level - * of a qdisc tree can be locked from any qdisc node in the - * forest. This allows changing the configuration of some - * aspect of the qdisc tree while blocking out asynchronous - * qdisc access in the packet processing paths. - * - * It is only legal to do this when the root will not change - * on us. Otherwise we'll potentially lock the wrong qdisc - * root. This is enforced by holding the RTNL semaphore, which - * all users of this lock accessor must do. - */ -static inline spinlock_t *qdisc_root_lock(const struct Qdisc *qdisc) -{ - struct Qdisc *root = qdisc_root(qdisc); - - ASSERT_RTNL(); - return qdisc_lock(root); -} - static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) { struct Qdisc *root = qdisc_root_sleeping(qdisc); @@ -722,6 +677,9 @@ qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, { } #endif +void qdisc_offload_query_caps(struct net_device *dev, + enum tc_setup_type type, + void *caps, size_t caps_len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack); @@ -980,13 +938,6 @@ static inline void qdisc_purge_queue(struct Qdisc *sch) qdisc_tree_reduce_backlog(sch, qlen, backlog); } -static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) -{ - qh->head = NULL; - qh->tail = NULL; - qh->qlen = 0; -} - static inline void __qdisc_enqueue_tail(struct sk_buff *skb, struct qdisc_skb_head *qh) { @@ -1177,7 +1128,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) static inline void qdisc_reset_queue(struct Qdisc *sch) { __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; } static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, @@ -1246,6 +1196,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1255,6 +1206,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1277,6 +1231,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..a04999ee99b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,21 +103,20 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); -struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); -int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), - struct net *net, +int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); @@ -510,8 +509,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -#define sctp_for_each_hentry(epb, head) \ - hlist_for_each_entry(epb, head, node) +#define sctp_for_each_hentry(ep, head) \ + hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..350f250b0dc7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,12 +984,10 @@ struct sctp_transport { } cacc; struct { - __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; - __u8 probe_count:3; - __u8 raise_count:5; + __u8 probe_count; __u8 state; } pl; /* plpmtud related */ @@ -1011,6 +1009,7 @@ void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); void sctp_transport_reset_probe_timer(struct sctp_transport *transport); +void sctp_transport_reset_raise_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1025,7 +1024,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -bool sctp_transport_pl_send(struct sctp_transport *t); +void sctp_transport_pl_send(struct sctp_transport *t); bool sctp_transport_pl_recv(struct sctp_transport *t); @@ -1244,10 +1243,6 @@ enum sctp_endpoint_type { */ struct sctp_ep_common { - /* Fields to help us manage our entries in the hash tables. */ - struct hlist_node node; - int hashent; - /* Runtime type information. What kind of endpoint is this? */ enum sctp_endpoint_type type; @@ -1299,6 +1294,10 @@ struct sctp_endpoint { /* Common substructure for endpoint and association. */ struct sctp_ep_common base; + /* Fields to help us manage our entries in the hash tables. */ + struct hlist_node node; + int hashent; + /* Associations: A list of current associations and mappings * to the data consumers for each association. This * may be in the form of a hash table or other @@ -1355,6 +1354,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1370,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c2..21e7fa2a1813 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,10 @@ #include <linux/types.h> -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +struct net; + +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/include/net/seg6.h b/include/net/seg6.h index 9d19c15e8545..af668f17b398 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,9 +58,30 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); +extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); +extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); + +/* If the packet which invoked an ICMP error contains an SRH return + * the true destination address from within the SRH, otherwise use the + * destination address in the IP header. + */ +static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, + struct inet6_skb_parm *opt) +{ + struct ipv6_sr_hdr *srh; + + if (opt->flags & IP6SKB_SEG6) { + srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); + return &srh->segments[0]; + } + + return NULL; +} + + #endif diff --git a/include/net/smc.h b/include/net/smc.h index e441aa97ad61..c926d3313e05 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -11,6 +11,13 @@ #ifndef _SMC_H #define _SMC_H +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/wait.h> + +struct sock; + #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ struct smc_hashinfo { @@ -65,7 +72,7 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); - void (*get_system_eid)(struct smcd_dev *dev, u8 **eid); + u8* (*get_system_eid)(void); u16 (*get_chid)(struct smcd_dev *dev); }; @@ -94,5 +101,5 @@ int smcd_register_dev(struct smcd_dev *smcd); void smcd_unregister_dev(struct smcd_dev *smcd); void smcd_free_dev(struct smcd_dev *smcd); void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event); -void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit); +void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask); #endif /* _SMC_H */ diff --git a/include/net/sock.h b/include/net/sock.h index bea21ff70e74..5db02546941c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -56,13 +56,13 @@ #include <linux/wait.h> #include <linux/cgroup-defs.h> #include <linux/rbtree.h> -#include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> #include <linux/sockptr.h> #include <linux/indirect_call_wrapper.h> #include <linux/atomic.h> #include <linux/refcount.h> +#include <linux/llist.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned - * address on 64bit arches : cf INET_MATCH() - */ union { __addrpair skc_addrpair; struct { @@ -248,6 +245,7 @@ struct sock_common { }; struct bpf_local_storage; +struct sk_filter; /** * struct sock - network layer representation of sockets @@ -284,9 +282,7 @@ struct bpf_local_storage; * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) - * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) - * @sk_route_forced_caps: static, forced route capabilities - * (set in tcp_init_sock()) + * @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden. * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -316,6 +312,7 @@ struct bpf_local_storage; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_txhash: computed flow hash for use on transmit + * @sk_txrehash: enable TX hash rethink * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received @@ -350,6 +347,8 @@ struct bpf_local_storage; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @ns_tracker: tracker for netns reference + * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -391,6 +390,11 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + socket_lock_t sk_lock; atomic_t sk_drops; int sk_rcvlowat; @@ -410,6 +414,7 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; + #define sk_rmem_alloc sk_backlog.rmem_alloc int sk_forward_alloc; @@ -431,9 +436,6 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; @@ -460,8 +462,6 @@ struct sock { unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - netdev_features_t sk_route_forced_caps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; @@ -471,7 +471,7 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - u8 sk_padding : 1, + u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, @@ -488,11 +488,13 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + u8 sk_txrehash; #ifdef CONFIG_NET_RX_BUSY_POLL u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; #endif spinlock_t sk_peer_lock; + int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; @@ -502,9 +504,8 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; - int sk_bind_phc; u8 sk_shutdown; - u32 sk_tskey; + atomic_t sk_tskey; atomic_t sk_zckey; u8 sk_clockid; @@ -536,6 +537,8 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; + netns_tracker ns_tracker; + struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -544,14 +547,26 @@ enum sk_pacing { SK_PACING_FQ = 2, }; -/* Pointer stored in sk_user_data might not be suitable for copying - * when cloning the socket. For instance, it can point to a reference - * counted object. sk_user_data bottom bit is set if pointer must not - * be copied. +/* flag bits in sk_user_data + * + * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might + * not be suitable for copying when cloning the socket. For instance, + * it can point to a reference counted object. sk_user_data bottom + * bit is set if pointer must not be copied. + * + * - SK_USER_DATA_BPF: Mark whether sk_user_data field is + * managed/owned by a BPF reuseport array. This bit should be set + * when sk_user_data's sk is added to the bpf's reuseport_array. + * + * - SK_USER_DATA_PSOCK: Mark whether pointer stored in + * sk_user_data points to psock type. This bit should be set + * when sk_user_data is assigned to a psock object. */ #define SK_USER_DATA_NOCOPY 1UL -#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ -#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) +#define SK_USER_DATA_BPF 2UL +#define SK_USER_DATA_PSOCK 4UL +#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\ + SK_USER_DATA_PSOCK) /** * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied @@ -564,24 +579,77 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) +/** + * __locked_read_sk_user_data_with_flags - return the pointer + * only if argument flags all has been set in sk_user_data. Otherwise + * return NULL + * + * @sk: socket + * @flags: flag bits + * + * The caller must be holding sk->sk_callback_lock. + */ +static inline void * +__locked_read_sk_user_data_with_flags(const struct sock *sk, + uintptr_t flags) +{ + uintptr_t sk_user_data = + (uintptr_t)rcu_dereference_check(__sk_user_data(sk), + lockdep_is_held(&sk->sk_callback_lock)); + + WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); + + if ((sk_user_data & flags) == flags) + return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); + return NULL; +} + +/** + * __rcu_dereference_sk_user_data_with_flags - return the pointer + * only if argument flags all has been set in sk_user_data. Otherwise + * return NULL + * + * @sk: socket + * @flags: flag bits + */ +static inline void * +__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, + uintptr_t flags) +{ + uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); + + WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); + + if ((sk_user_data & flags) == flags) + return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); + return NULL; +} + #define rcu_dereference_sk_user_data(sk) \ + __rcu_dereference_sk_user_data_with_flags(sk, 0) +#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ ({ \ - void *__tmp = rcu_dereference(__sk_user_data((sk))); \ - (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \ -}) -#define rcu_assign_sk_user_data(sk, ptr) \ -({ \ - uintptr_t __tmp = (uintptr_t)(ptr); \ - WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ - rcu_assign_pointer(__sk_user_data((sk)), __tmp); \ -}) -#define rcu_assign_sk_user_data_nocopy(sk, ptr) \ -({ \ - uintptr_t __tmp = (uintptr_t)(ptr); \ - WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ + uintptr_t __tmp1 = (uintptr_t)(ptr), \ + __tmp2 = (uintptr_t)(flags); \ + WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \ + WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \ rcu_assign_pointer(__sk_user_data((sk)), \ - __tmp | SK_USER_DATA_NOCOPY); \ + __tmp1 | __tmp2); \ }) +#define rcu_assign_sk_user_data(sk, ptr) \ + __rcu_assign_sk_user_data_with_flags(sk, ptr, 0) + +static inline +struct net *sock_net(const struct sock *sk) +{ + return read_pnet(&sk->sk_net); +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ + write_pnet(&sk->sk_net, net); +} /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK @@ -596,7 +664,7 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) int sk_set_peek_off(struct sock *sk, int val); -static inline int sk_peek_offset(struct sock *sk, int flags) +static inline int sk_peek_offset(const struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { return READ_ONCE(sk->sk_peek_off); @@ -676,11 +744,6 @@ static inline void sk_node_init(struct hlist_node *node) node->pprev = NULL; } -static inline void sk_nulls_node_init(struct hlist_nulls_node *node) -{ - node->pprev = NULL; -} - static inline void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); @@ -804,6 +867,16 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } +static inline void __sk_del_bind2_node(struct sock *sk) +{ + __hlist_del(&sk->sk_bind2_node); +} + +static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head(&sk->sk_bind2_node, list); +} + #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -821,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_bhash2(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset @@ -836,7 +911,7 @@ static inline void sk_add_bind_node(struct sock *sk, ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ pos = rcu_dereference(hlist_next_rcu(pos))) -static inline struct user_namespace *sk_user_ns(struct sock *sk) +static inline struct user_namespace *sk_user_ns(const struct sock *sk) { /* Careful only use this in a context where these parameters * can not change and must all be valid, such as recvmsg from @@ -877,11 +952,12 @@ enum sock_flags { SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ + SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { nsk->sk_flags = osk->sk_flags; } @@ -1022,12 +1098,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { if (sk_memalloc_socks() && skb_pfmemalloc(skb)) return __sk_backlog_rcv(sk, skb); - return sk->sk_backlog_rcv(sk, skb); + return INDIRECT_CALL_INET(sk->sk_backlog_rcv, + tcp_v6_do_rcv, + tcp_v4_do_rcv, + sk, skb); } static inline void sk_incoming_cpu_update(struct sock *sk) @@ -1178,8 +1260,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len); + size_t len, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, @@ -1199,6 +1280,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*put_port)(struct sock *sk); #ifdef CONFIG_BPF_SYSCALL int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, @@ -1210,7 +1292,9 @@ struct proto { unsigned int inuse_idx; #endif +#if IS_ENABLED(CONFIG_MPTCP) int (*forward_alloc_get)(const struct sock *sk); +#endif bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); @@ -1218,6 +1302,7 @@ struct proto { void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ + int __percpu *per_cpu_fw_alloc; struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1299,10 +1384,11 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int static inline int sk_forward_alloc_get(const struct sock *sk) { - if (!sk->sk_prot->forward_alloc_get) - return sk->sk_forward_alloc; - - return sk->sk_prot->forward_alloc_get(sk); +#if IS_ENABLED(CONFIG_MPTCP) + if (sk->sk_prot->forward_alloc_get) + return sk->sk_prot->forward_alloc_get(sk); +#endif + return sk->sk_forward_alloc; } static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) @@ -1360,21 +1446,46 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) } static inline long -sk_memory_allocated(const struct sock *sk) +proto_memory_allocated(const struct proto *prot) { - return atomic_long_read(sk->sk_prot->memory_allocated); + return max(0L, atomic_long_read(prot->memory_allocated)); } static inline long +sk_memory_allocated(const struct sock *sk) +{ + return proto_memory_allocated(sk->sk_prot); +} + +/* 1 MB per cpu, in page units */ +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) + +static inline void sk_memory_allocated_add(struct sock *sk, int amt) { - return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } static inline void sk_memory_allocated_sub(struct sock *sk, int amt) { - atomic_long_sub(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 @@ -1403,12 +1514,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot) return percpu_counter_sum_positive(prot->sockets_allocated); } -static inline long -proto_memory_allocated(struct proto *prot) -{ - return atomic_long_read(prot->memory_allocated); -} - static inline bool proto_memory_pressure(struct proto *prot) { @@ -1419,13 +1524,32 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS -/* Called with local bh disabled */ -void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int all; + int val[PROTO_INUSE_NR]; +}; + +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ + this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); +} + +static inline void sock_inuse_add(const struct net *net, int val) +{ + this_cpu_add(net->core.prot_inuse->all, val); +} + int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_inuse_get(struct net *net); #else -static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, - int inc) +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ +} + +static inline void sock_inuse_add(const struct net *net, int val) { } #endif @@ -1476,30 +1600,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind); void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -/* We used to have PAGE_SIZE here, but systems with 64KB pages - * do not necessarily have 16x time more memory than 4KB ones. - */ -#define SK_MEM_QUANTUM 4096 -#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 -/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +/* sysctl_mem values are in pages */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; - -#if PAGE_SIZE > SK_MEM_QUANTUM - val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; -#elif PAGE_SIZE < SK_MEM_QUANTUM - val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; -#endif - return val; + return READ_ONCE(sk->sk_prot->sysctl_mem[index]); } static inline int sk_mem_pages(int amt) { - return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; + return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT; } static inline bool sk_has_account(struct sock *sk) @@ -1510,19 +1622,23 @@ static inline bool sk_has_account(struct sock *sk) static inline bool sk_wmem_schedule(struct sock *sk, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_SEND); + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); } static inline bool sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV) || + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || skb_pfmemalloc(skb); } @@ -1548,7 +1664,7 @@ static inline void sk_mem_reclaim(struct sock *sk) reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - if (reclaimable >= SK_MEM_QUANTUM) + if (reclaimable >= (int)PAGE_SIZE) __sk_mem_reclaim(sk, reclaimable); } @@ -1558,19 +1674,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk) sk_mem_reclaim(sk); } -static inline void sk_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable; - - if (!sk_has_account(sk)) - return; - - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - if (reclaimable > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, reclaimable - 1); -} - static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) @@ -1578,39 +1681,12 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } -/* the following macros control memory reclaiming in sk_mem_uncharge() - */ -#define SK_RECLAIM_THRESHOLD (1 << 21) -#define SK_RECLAIM_CHUNK (1 << 20) - static inline void sk_mem_uncharge(struct sock *sk, int size) { - int reclaimable; - if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - /* Avoid a possible overflow. - * TCP send queues can make this happen, if sk_mem_reclaim() - * is not called and more than 2 GBytes are released at once. - * - * If we reach 2 MBytes, reclaim 1 MBytes right now, there is - * no need to hold that much forward allocation anyway. - */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); -} - -static inline void sock_release_ownership(struct sock *sk) -{ - if (sk->sk_lock.owned) { - sk->sk_lock.owned = 0; - - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + sk_mem_reclaim(sk); } /* @@ -1707,6 +1783,11 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) } } +void sockopt_lock_sock(struct sock *sk); +void sockopt_release_sock(struct sock *sk); +bool sockopt_ns_capable(struct user_namespace *ns, int cap); +bool sockopt_capable(int cap); + /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it * from under us. It essentially blocks any incoming @@ -1739,12 +1820,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) return sk->sk_lock.owned; } +static inline void sock_release_ownership(struct sock *sk) +{ + if (sock_owned_by_user_nocheck(sk)) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); + } +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { struct sock *sk = (struct sock *)csk; - return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock); + return !sock_owned_by_user_nocheck(sk) && + !spin_is_locked(&sk->sk_lock.slock); } struct sock *sk_alloc(struct net *net, int family, gfp_t priority, @@ -1770,23 +1862,40 @@ void sock_pfree(struct sk_buff *skb); #define sock_edemux sock_efree #endif +int sk_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); +int sk_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen); int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order); + +static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk, + unsigned long size, + int noblock, int *errcode) +{ + return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); +} + void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); void sk_send_sigurg(struct sock *sk); +static inline void sock_replace_proto(struct sock *sk, struct proto *proto) +{ + if (sk->sk_socket) + clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + WRITE_ONCE(sk->sk_prot, proto); +} + struct sockcm_cookie { u64 transmit_time; u32 mark; @@ -2007,7 +2116,7 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) static inline u32 net_tx_rndhash(void) { - u32 v = prandom_u32(); + u32 v = get_random_u32(); return v ?: 1; } @@ -2020,7 +2129,7 @@ static inline void sk_set_txhash(struct sock *sk) static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) { + if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) { sk_set_txhash(sk); return true; } @@ -2118,13 +2227,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) { if (skb_get_dst_pending_confirm(skb)) { struct sock *sk = skb->sk; - unsigned long now = jiffies; - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) WRITE_ONCE(sk->sk_dst_pending_confirm, 0); + neigh_confirm(n); } } @@ -2137,10 +2243,10 @@ static inline bool sk_can_gso(const struct sock *sk) void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) +static inline void sk_gso_disable(struct sock *sk) { - sk->sk_route_nocaps |= flags; - sk->sk_route_caps &= ~flags; + sk->sk_gso_disabled = 1; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, @@ -2186,9 +2292,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro if (err) return err; - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; @@ -2347,7 +2451,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); + +static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return sock_queue_rcv_skb_reason(sk, skb, NULL); +} int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); struct sk_buff *sock_dequeue_err_skb(struct sock *sk); @@ -2481,7 +2592,7 @@ static inline gfp_t gfp_any(void) static inline gfp_t gfp_memcg_charge(void) { - return in_softirq() ? GFP_NOWAIT : GFP_KERNEL; + return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) @@ -2598,20 +2709,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) -static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { -#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ - (1UL << SOCK_RCVTSTAMP)) +#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ + (1UL << SOCK_RCVTSTAMP) | \ + (1UL << SOCK_RCVMARK)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) - __sock_recv_ts_and_drops(msg, sk, skb); + if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) @@ -2636,7 +2748,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = sk->sk_tskey++; + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; @@ -2654,6 +2766,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2668,18 +2785,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static inline -struct net *sock_net(const struct sock *sk) -{ - return read_pnet(&sk->sk_net); -} - -static inline -void sock_net_set(struct sock *sk, struct net *net) -{ - write_pnet(&sk->sk_net, net); -} - static inline bool skb_sk_is_prefetched(struct sk_buff *skb) { @@ -2796,18 +2901,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) @@ -2828,13 +2933,14 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val) */ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); int mdif; - if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + if (!bound_dev_if || bound_dev_if == dif) return true; mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); - if (mdif && mdif == sk->sk_bound_dev_if) + if (mdif && mdif == bound_dev_if) return true; return false; diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 473b0b0fa4ab..efc9085c6892 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk, extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); -static inline bool reuseport_has_conns(struct sock *sk, bool set) +static inline bool reuseport_has_conns(struct sock *sk) { struct sock_reuseport *reuse; bool ret = false; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); - if (reuse) { - if (set) - reuse->has_conns = 1; - ret = reuse->has_conns; - } + if (reuse && reuse->has_conns) + ret = true; rcu_read_unlock(); return ret; } +void reuseport_has_conns_set(struct sock *sk); + #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/stp.h b/include/net/stp.h index 2914e6d53490..528103fce2c0 100644 --- a/include/net/stp.h +++ b/include/net/stp.h @@ -2,6 +2,8 @@ #ifndef _NET_STP_H #define _NET_STP_H +#include <linux/if_ether.h> + struct stp_proto { unsigned char group_address[ETH_ALEN]; void (*rcv)(const struct stp_proto *, struct sk_buff *, diff --git a/include/net/strparser.h b/include/net/strparser.h index 732b7097d78e..41e2ce9e9e10 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -65,7 +65,14 @@ struct _strp_msg { struct sk_skb_cb { #define SK_SKB_CB_PRIV_LEN 20 unsigned char data[SK_SKB_CB_PRIV_LEN]; + /* align strp on cache line boundary within skb->cb[] */ + unsigned char pad[4]; struct _strp_msg strp; + + /* strp users' data follows */ + struct tls_msg { + u8 control; + } tls; /* temp_reg is a temporary register used for bpf_convert_data_end_access * when dst_reg == src_reg. */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d353793dfeb5..7dcdc97c0bc3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,6 +19,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, + SWITCHDEV_ATTR_ID_PORT_MST_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, @@ -27,7 +28,14 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + SWITCHDEV_ATTR_ID_BRIDGE_MST, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + SWITCHDEV_ATTR_ID_VLAN_MSTI, +}; + +struct switchdev_mst_state { + u16 msti; + u8 state; }; struct switchdev_brport_flags { @@ -35,6 +43,11 @@ struct switchdev_brport_flags { unsigned long mask; }; +struct switchdev_vlan_msti { + u16 vid; + u16 msti; +}; + struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; @@ -43,13 +56,16 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ + struct switchdev_mst_state mst_state; /* PORT_MST_STATE */ struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ + bool mst; /* BRIDGE_MST */ bool mc_disabled; /* MC_DISABLED */ u8 mrp_port_role; /* MRP_PORT_ROLE */ + struct switchdev_vlan_msti vlan_msti; /* VLAN_MSTI */ } u; }; @@ -81,6 +97,13 @@ struct switchdev_obj_port_vlan { struct switchdev_obj obj; u16 flags; u16 vid; + /* If set, the notifier signifies a change of one of the following + * flags for a VLAN that already exists: + * - BRIDGE_VLAN_INFO_PVID + * - BRIDGE_VLAN_INFO_UNTAGGED + * Entries with BRIDGE_VLAN_INFO_BRENTRY unset are not notified at all. + */ + bool changed; }; #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ @@ -216,6 +239,9 @@ struct switchdev_notifier_info { const void *ctx; }; +/* Remember to update br_switchdev_fdb_populate() when adding + * new members to this structure + */ struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; @@ -306,10 +332,7 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); + const struct switchdev_notifier_fdb_info *fdb_info)); int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -317,11 +340,26 @@ int switchdev_handle_port_obj_add(struct net_device *dev, int (*add_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack)); +int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)); int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), int (*del_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj)); +int switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)); int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, @@ -421,10 +459,7 @@ switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event const struct net_device *foreign_dev), int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info), - int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)) + const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; } @@ -440,6 +475,18 @@ switchdev_handle_port_obj_add(struct net_device *dev, return 0; } +static inline int switchdev_handle_port_obj_add_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack)) +{ + return 0; +} + static inline int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -451,6 +498,18 @@ switchdev_handle_port_obj_del(struct net_device *dev, } static inline int +switchdev_handle_port_obj_del_foreign(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj)) +{ + return 0; +} + +static inline int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index eb8f01c819e6..832efd40e023 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } +static inline bool is_tcf_gact_continue(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false); +} + +static inline bool is_tcf_gact_reclassify(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false); +} + +static inline bool is_tcf_gact_pipe(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_PIPE, false); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index 8bc6be81a7ad..c8fa11ebb397 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a) return false; } -static inline u32 tcf_gate_index(const struct tc_action *a) -{ - return a->tcfa_index; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 1cace4c69e44..32ce8ea36950 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_eaction; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + netdevice_tracker tcfm_dev_tracker; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7e..3e02709a1df6 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 72649512dcdd..283bde711a42 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -159,4 +159,34 @@ static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) return params->tcfp_mtu; } +static inline u64 tcf_police_peakrate_bytes_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->peak.rate_bytes_ps; +} + +static inline u32 tcf_police_tcfp_ewma_rate(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->tcfp_ewma_rate; +} + +static inline u16 tcf_police_rate_overhead(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->rate.overhead; +} + #endif /* __NET_TC_POLICE_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 00bfee70609e..dc1079f28e13 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -17,6 +17,7 @@ struct tcf_skbedit_params { u32 mark; u32 mask; u16 queue_mapping; + u16 mapping_mod; u16 ptype; struct rcu_head rcu; }; @@ -94,4 +95,16 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +/* Return true iff action is queue_mapping */ +static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); +} + +/* Return true iff action is inheritdsfield */ +static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD); +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index f94b8bc26f9e..904eddfc1826 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -78,4 +78,14 @@ static inline u8 tcf_vlan_push_prio(const struct tc_action *a) return tcfv_push_prio; } + +static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest, + const struct tc_action *a) +{ + rcu_read_lock(); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN); + memcpy(src, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); + rcu_read_unlock(); +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4da22b41bde6..14d45661a84d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -253,6 +253,8 @@ extern long sysctl_tcp_mem[3]; #define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ extern atomic_long_t tcp_memory_allocated; +DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); + extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; @@ -325,6 +327,8 @@ void tcp_remove_empty_skb(struct sock *sk); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, + size_t size, struct ubuf_info *uarg); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, @@ -344,6 +348,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); +void tcp_twsk_purge(struct list_head *net_exit_list, int family); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -400,14 +405,18 @@ void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +int do_tcp_getsockopt(struct sock *sk, int level, + int optname, sockptr_t optval, sockptr_t optlen); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); bool tcp_bpf_bypass_getsockopt(int level, int optname); +int do_tcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); @@ -432,6 +441,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, struct tcphdr *th, u32 *cookie); u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie); +u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss); u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct tcphdr *th); @@ -480,6 +490,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES @@ -620,6 +631,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); +void tcp_check_space(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); @@ -667,6 +679,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); +void tcp_read_done(struct sock *sk, size_t len); void tcp_initialize_rcv_mss(struct sock *sk); @@ -930,7 +945,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1042,6 +1057,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ @@ -1139,15 +1155,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } -static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ca_ops->set_state) - icsk->icsk_ca_ops->set_state(sk, ca_state); - icsk->icsk_ca_state = ca_state; -} - static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1156,6 +1163,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_cong.c */ +void tcp_set_ca_state(struct sock *sk, const u8 ca_state); + /* From tcp_rate.c */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, @@ -1164,6 +1174,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); +static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) +{ + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. @@ -1207,9 +1222,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) +{ + return tp->snd_cwnd; +} + +static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) +{ + WARN_ON_ONCE((int)val <= 0); + tp->snd_cwnd = val; +} + static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { - return tp->snd_cwnd < tp->snd_ssthresh; + return tcp_snd_cwnd(tp) < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) @@ -1235,8 +1261,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, - ((tp->snd_cwnd >> 1) + - (tp->snd_cwnd >> 2))); + ((tcp_snd_cwnd(tp) >> 1) + + (tcp_snd_cwnd(tp) >> 2))); } /* Use define here intentionally to get WARN_ON location shown at the caller */ @@ -1276,11 +1302,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + if (tp->is_cwnd_limited) + return true; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) - return tp->snd_cwnd < 2 * tp->max_packets_out; + return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; - return tp->is_cwnd_limited; + return false; } /* BBR congestion control needs pacing. @@ -1367,7 +1396,10 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); + + int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); @@ -1387,8 +1419,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1403,7 +1435,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1477,21 +1509,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1504,7 +1539,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -1660,6 +1696,12 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return __tcp_md5_do_lookup(sk, l3index, addr, family); } +enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); + + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1668,6 +1710,14 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, { return NULL; } + +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + return SKB_NOT_DROPPED_YET; +} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -1803,11 +1853,6 @@ static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) return skb_rb_last(&sk->tcp_rtx_queue); } -static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); @@ -1998,7 +2043,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); @@ -2172,9 +2217,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) u16 segs_in; segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - tp->segs_in += segs_in; + + /* We update these fields while other threads might + * read them from tcp_get_info() + */ + WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in); if (skb->len > tcp_hdrlen(skb)) - tp->data_segs_in += segs_in; + WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in); } /* @@ -2340,7 +2389,7 @@ static inline u32 tcp_timeout_init(struct sock *sk) if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; - return timeout; + return min_t(int, timeout, TCP_RTO_MAX); } static inline u32 tcp_rwnd_init_bpf(struct sock *sk) diff --git a/include/net/tls.h b/include/net/tls.h index 526cb2c3b724..154949c7b0c8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -39,7 +39,6 @@ #include <linux/crypto.h> #include <linux/socket.h> #include <linux/tcp.h> -#include <linux/skmsg.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/rcupdate.h> @@ -50,6 +49,17 @@ #include <crypto/aead.h> #include <uapi/linux/tls.h> +struct tls_rec; + +struct tls_cipher_size_desc { + unsigned int iv; + unsigned int key; + unsigned int salt; + unsigned int tag; + unsigned int rec_seq; +}; + +extern const struct tls_cipher_size_desc tls_cipher_size_desc[]; /* Maximum data size carried in a TLS record */ #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) @@ -64,7 +74,9 @@ #define TLS_AAD_SPACE_SIZE 13 #define MAX_IV_SIZE 16 +#define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 +#define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE /* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. * @@ -76,13 +88,6 @@ #define TLS_AES_CCM_IV_B0_BYTE 2 #define TLS_SM4_CCM_IV_B0_BYTE 2 -#define __TLS_INC_STATS(net, field) \ - __SNMP_INC_STATS((net)->mib.tls_statistics, field) -#define TLS_INC_STATS(net, field) \ - SNMP_INC_STATS((net)->mib.tls_statistics, field) -#define TLS_DEC_STATS(net, field) \ - SNMP_DEC_STATS((net)->mib.tls_statistics, field) - enum { TLS_BASE, TLS_SW, @@ -91,37 +96,6 @@ enum { TLS_NUM_CONFIG, }; -/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages - * allocated or mapped for each TLS record. After encryption, the records are - * stores in a linked list. - */ -struct tls_rec { - struct list_head list; - int tx_ready; - int tx_flags; - - struct sk_msg msg_plaintext; - struct sk_msg msg_encrypted; - - /* AAD | msg_plaintext.sg.data | sg_tag */ - struct scatterlist sg_aead_in[2]; - /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */ - struct scatterlist sg_aead_out[2]; - - char content_type; - struct scatterlist sg_content_type; - - char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[MAX_IV_SIZE]; - struct aead_request aead_req; - u8 aead_req_ctx[]; -}; - -struct tls_msg { - struct strp_msg rxm; - u8 control; -}; - struct tx_work { struct delayed_work work; struct sock *sk; @@ -144,21 +118,38 @@ struct tls_sw_context_tx { unsigned long tx_bitmask; }; +struct tls_strparser { + struct sock *sk; + + u32 mark : 8; + u32 stopped : 1; + u32 copy_mode : 1; + u32 msg_ready : 1; + + struct strp_msg stm; + + struct sk_buff *anchor; + struct work_struct work; +}; + struct tls_sw_context_rx { struct crypto_aead *aead_recv; struct crypto_wait async_wait; - struct strparser strp; struct sk_buff_head rx_list; /* list of decrypted 'data' records */ void (*saved_data_ready)(struct sock *sk); - struct sk_buff *recv_pkt; - u8 control; + u8 reader_present; u8 async_capable:1; - u8 decrypted:1; + u8 zc_capable:1; + u8 reader_contended:1; + + struct tls_strparser strp; + atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; - bool async_notify; + struct sk_buff_head async_hold; + struct wait_queue_head wq; }; struct tls_record_info { @@ -180,6 +171,8 @@ struct tls_offload_context_tx { struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; void (*sk_destruct)(struct sock *sk); + struct work_struct destruct_work; + struct tls_context *ctx; u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough @@ -245,6 +238,8 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; + u8 zerocopy_sendfile:1; + u8 rx_no_pad:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -252,7 +247,7 @@ struct tls_context { void *priv_ctx_tx; void *priv_ctx_rx; - struct net_device *netdev; + struct net_device __rcu *netdev; /* rw cache line */ struct cipher_context tx; @@ -352,43 +347,6 @@ struct tls_offload_context_rx { #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) -struct tls_context *tls_ctx_create(struct sock *sk); -void tls_ctx_free(struct sock *sk, struct tls_context *ctx); -void update_sk_prot(struct sock *sk, struct tls_context *ctx); - -int wait_on_pending_writer(struct sock *sk, long *timeo); -int tls_sk_query(struct sock *sk, int optname, char __user *optval, - int __user *optlen); -int tls_sk_attach(struct sock *sk, int optname, char __user *optval, - unsigned int optlen); -void tls_err_abort(struct sock *sk, int err); - -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); -void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); -void tls_sw_strparser_done(struct tls_context *tls_ctx); -int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); -void tls_sw_release_resources_tx(struct sock *sk); -void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); -void tls_sw_free_resources_rx(struct sock *sk); -void tls_sw_release_resources_rx(struct sock *sk); -void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); -int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len); -bool tls_sw_sock_is_readable(struct sock *sk); -ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, unsigned int flags); - -int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -int tls_tx_records(struct sock *sk, int flags); - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); @@ -402,56 +360,6 @@ static inline u32 tls_record_start_seq(struct tls_record_info *rec) return rec->end_seq - rec->len; } -int tls_push_sg(struct sock *sk, struct tls_context *ctx, - struct scatterlist *sg, u16 first_offset, - int flags); -int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, - int flags); -void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); - -static inline struct tls_msg *tls_msg(struct sk_buff *skb) -{ - return (struct tls_msg *)strp_msg(skb); -} - -static inline bool tls_is_partially_sent_record(struct tls_context *ctx) -{ - return !!ctx->partially_sent_record; -} - -static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) -{ - return tls_ctx->pending_open_record_frags; -} - -static inline bool is_tx_ready(struct tls_sw_context_tx *ctx) -{ - struct tls_rec *rec; - - rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (!rec) - return false; - - return READ_ONCE(rec->tx_ready); -} - -static inline u16 tls_user_config(struct tls_context *ctx, bool tx) -{ - u16 config = tx ? ctx->tx_conf : ctx->rx_conf; - - switch (config) { - case TLS_BASE: - return TLS_CONF_BASE; - case TLS_SW: - return TLS_CONF_SW; - case TLS_HW: - return TLS_CONF_HW; - case TLS_HW_RECORD: - return TLS_CONF_HW_RECORD; - } - return 0; -} - struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); @@ -470,31 +378,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline bool tls_bigint_increment(unsigned char *seq, int len) -{ - int i; - - for (i = len - 1; i >= 0; i--) { - ++seq[i]; - if (seq[i] != 0) - break; - } - - return (i == -1); -} - -static inline void tls_bigint_subtract(unsigned char *seq, int n) -{ - u64 rcd_sn; - __be64 *p; - - BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8); - - p = (__be64 *)seq; - rcd_sn = be64_to_cpu(*p); - *p = cpu_to_be64(rcd_sn - n); -} - static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -505,82 +388,6 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) return (__force void *)icsk->icsk_ulp_data; } -static inline void tls_advance_record_sn(struct sock *sk, - struct tls_prot_info *prot, - struct cipher_context *ctx) -{ - if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, -EBADMSG); - - if (prot->version != TLS_1_3_VERSION && - prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) - tls_bigint_increment(ctx->iv + prot->salt_size, - prot->iv_size); -} - -static inline void tls_fill_prepend(struct tls_context *ctx, - char *buf, - size_t plaintext_len, - unsigned char record_type) -{ - struct tls_prot_info *prot = &ctx->prot_info; - size_t pkt_len, iv_size = prot->iv_size; - - pkt_len = plaintext_len + prot->tag_size; - if (prot->version != TLS_1_3_VERSION && - prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) { - pkt_len += iv_size; - - memcpy(buf + TLS_NONCE_OFFSET, - ctx->tx.iv + prot->salt_size, iv_size); - } - - /* we cover nonce explicit here as well, so buf should be of - * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE - */ - buf[0] = prot->version == TLS_1_3_VERSION ? - TLS_RECORD_TYPE_DATA : record_type; - /* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */ - buf[1] = TLS_1_2_VERSION_MINOR; - buf[2] = TLS_1_2_VERSION_MAJOR; - /* we can use IV for nonce explicit according to spec */ - buf[3] = pkt_len >> 8; - buf[4] = pkt_len & 0xFF; -} - -static inline void tls_make_aad(char *buf, - size_t size, - char *record_sequence, - unsigned char record_type, - struct tls_prot_info *prot) -{ - if (prot->version != TLS_1_3_VERSION) { - memcpy(buf, record_sequence, prot->rec_seq_size); - buf += 8; - } else { - size += prot->tag_size; - } - - buf[0] = prot->version == TLS_1_3_VERSION ? - TLS_RECORD_TYPE_DATA : record_type; - buf[1] = TLS_1_2_VERSION_MAJOR; - buf[2] = TLS_1_2_VERSION_MINOR; - buf[3] = size >> 8; - buf[4] = size & 0xFF; -} - -static inline void xor_iv_with_seq(struct tls_prot_info *prot, char *iv, char *seq) -{ - int i; - - if (prot->version == TLS_1_3_VERSION || - prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { - for (i = 0; i < 8; i++) - iv[i + 4] ^= seq[i]; - } -} - - static inline struct tls_sw_context_rx *tls_sw_ctx_rx( const struct tls_context *tls_ctx) { @@ -617,16 +424,12 @@ static inline bool tls_sw_has_ctx_rx(const struct sock *sk) return !!tls_sw_ctx_rx(ctx); } -void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); -void tls_device_write_space(struct sock *sk, struct tls_context *ctx); - static inline struct tls_offload_context_rx * tls_offload_ctx_rx(const struct tls_context *tls_ctx) { return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; } -#if IS_ENABLED(CONFIG_TLS_DEVICE) static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, enum tls_offload_ctx_dir direction) { @@ -641,7 +444,6 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) { return __tls_driver_ctx(tls_get_ctx(sk), direction); } -#endif #define RESYNC_REQ BIT(0) #define RESYNC_REQ_ASYNC BIT(1) @@ -696,31 +498,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk) return ret; } -int __net_init tls_proc_init(struct net *net); -void __net_exit tls_proc_fini(struct net *net); - -int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, - unsigned char *record_type); -int decrypt_skb(struct sock *sk, struct sk_buff *skb, - struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); -int tls_sw_fallback_init(struct sock *sk, - struct tls_offload_context_tx *offload_ctx, - struct tls_crypto_info *crypto_info); - #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); -void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); -void tls_device_free_resources_tx(struct sock *sk); -int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); -void tls_device_offload_cleanup_rx(struct sock *sk); -void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); -int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, - struct sk_buff *skb, struct strp_msg *rxm); static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) { @@ -729,33 +511,5 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return false; return tls_get_ctx(sk)->rx_conf == TLS_HW; } -#else -static inline void tls_device_init(void) {} -static inline void tls_device_cleanup(void) {} - -static inline int -tls_set_device_offload(struct sock *sk, struct tls_context *ctx) -{ - return -EOPNOTSUPP; -} - -static inline void tls_device_free_resources_tx(struct sock *sk) {} - -static inline int -tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) -{ - return -EOPNOTSUPP; -} - -static inline void tls_device_offload_cleanup_rx(struct sock *sk) {} -static inline void -tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} - -static inline int -tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, - struct sk_buff *skb, struct strp_msg *rxm) -{ - return 0; -} #endif #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index da06613c9603..b830463e3dff 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -3,6 +3,7 @@ #define _TRANSP_V6_H #include <net/checksum.h> +#include <net/sock.h> /* IPv6 transport protocols */ extern struct proto rawv6_prot; @@ -12,6 +13,7 @@ extern struct proto tcpv6_prot; extern struct proto pingv6_prot; struct flowi6; +struct ipcm6_cookie; /* extension headers */ int ipv6_exthdrs_init(void); diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h index 2ea3deba4c99..7b0de7852908 100644 --- a/include/net/tun_proto.h +++ b/include/net/tun_proto.h @@ -1,7 +1,8 @@ #ifndef __NET_TUN_PROTO_H #define __NET_TUN_PROTO_H -#include <linux/kernel.h> +#include <linux/if_ether.h> +#include <linux/types.h> /* One byte protocol values as defined by VXLAN-GPE and NSH. These will * hopefully get a shared IANA registry. diff --git a/include/net/udp.h b/include/net/udp.h index 909ecf447e0f..fee053bcd17c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -95,6 +95,7 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; +DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; @@ -167,36 +168,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); -struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, - struct udphdr *uh, struct sock *sk); -int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); - struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); -static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) -{ - struct udphdr *uh; - unsigned int hlen, off; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) - uh = skb_gro_header_slow(skb, hlen, off); - - return uh; -} - /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { @@ -262,7 +239,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); @@ -270,18 +247,18 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, } /* net/ipv4/udp.c */ -void udp_destruct_sock(struct sock *sk); +void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, + int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_udp(sk, flags, noblock, &off, err); + return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); @@ -329,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index afc7ce713657..72394f441dad 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net, typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); +typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, + struct sk_buff *skb, + unsigned int udp_offset); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, @@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg { __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; + udp_tunnel_encap_err_rcv_t encap_err_rcv; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; diff --git a/include/net/udplite.h b/include/net/udplite.h index 9185e45b997f..299c14ce2bb9 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -6,6 +6,7 @@ #define _UDPLITE_H #include <net/ip6_checksum.h> +#include <net/udp.h> /* UDP-Lite socket options */ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ @@ -24,14 +25,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; } -/* Designate sk as UDP-Lite socket */ -static inline int udplite_sk_init(struct sock *sk) -{ - udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; - return 0; -} - /* * Checksumming routines */ @@ -70,49 +63,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -/* Slow-path computation of checksum. Socket is locked. */ -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - const struct udp_sock *up = udp_sk(skb->sk); - int cscov = up->len; - __wsum csum = 0; - - if (up->pcflag & UDPLITE_SEND_CC) { - /* - * Sender has set `partial coverage' option on UDP-Lite socket. - * The special case "up->pcslen == 0" signifies full coverage. - */ - if (up->pcslen < up->len) { - if (0 < up->pcslen) - cscov = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); - } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ - } - - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - skb_queue_walk(&sk->sk_write_queue, skb) { - const int off = skb_transport_offset(skb); - const int len = skb->len - off; - - csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); - - if ((cscov -= len) <= 0) - break; - } - return csum; -} - /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 08537aa14f7c..bca5b01af247 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -10,6 +10,7 @@ #include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 +#define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -226,11 +227,56 @@ struct vxlan_config { enum ifla_vxlan_df df; }; +enum { + VXLAN_VNI_STATS_RX, + VXLAN_VNI_STATS_RX_DROPS, + VXLAN_VNI_STATS_RX_ERRORS, + VXLAN_VNI_STATS_TX, + VXLAN_VNI_STATS_TX_DROPS, + VXLAN_VNI_STATS_TX_ERRORS, +}; + +struct vxlan_vni_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + u64 tx_errors; +}; + +struct vxlan_vni_stats_pcpu { + struct vxlan_vni_stats stats; + struct u64_stats_sync syncp; +}; + struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; }; +struct vxlan_vni_node { + struct rhash_head vnode; + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif + struct list_head vlist; + __be32 vni; + union vxlan_addr remote_ip; /* default remote ip for this vni */ + struct vxlan_vni_stats_pcpu __percpu *stats; + + struct rcu_head rcu; +}; + +struct vxlan_vni_group { + struct rhashtable vni_hash; + struct list_head vni_list; + u32 num_vnis; +}; + /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ @@ -253,6 +299,8 @@ struct vxlan_dev { struct vxlan_config cfg; + struct vxlan_vni_group __rcu *vnigrp; + struct hlist_head fdb_head[FDB_HASH_SIZE]; }; @@ -273,6 +321,7 @@ struct vxlan_dev { #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 +#define VXLAN_F_VNIFILTER 0x20000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -282,7 +331,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ @@ -291,7 +341,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ - VXLAN_F_COLLECT_METADATA) + VXLAN_F_COLLECT_METADATA | \ + VXLAN_F_VNIFILTER) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/net/xdp.h b/include/net/xdp.h index 447f9b1578f3..55dbc68bfffc 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -60,12 +60,20 @@ struct xdp_rxq_info { u32 reg_state; struct xdp_mem_info mem; unsigned int napi_id; + u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { struct net_device *dev; }; +enum xdp_buff_flags { + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under + * pressure + */ +}; + struct xdp_buff { void *data; void *data_end; @@ -74,13 +82,40 @@ struct xdp_buff { struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { xdp->frame_sz = frame_sz; xdp->rxq = rxq; + xdp->flags = 0; } static __always_inline void @@ -111,19 +146,44 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +{ + unsigned int len = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + len += sinfo->xdp_frags_size; +out: + return len; +} + struct xdp_frame { void *data; u16 len; u16 headroom; - u32 metasize:8; - u32 frame_sz:24; + u32 metasize; /* uses lower 8-bits */ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, * while mem info is valid on remote CPU. */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 frame_sz; + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +static inline void +xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + bool pfmemalloc) +{ + skb_shinfo(skb)->nr_frags = nr_frags; + + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; + skb->pfmemalloc |= pfmemalloc; +} + /* Avoids inlining WARN macro in fast-path */ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) @@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->data_end = frame->data + frame->len; xdp->data_meta = frame->data - frame->metasize; xdp->frame_sz = frame->frame_sz; + xdp->flags = frame->flags; } static inline @@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp, xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + xdp_frame->flags = xdp->flags; return 0; } @@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); @@ -246,20 +323,60 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem); static inline void xdp_release_frame(struct xdp_frame *xdpf) { struct xdp_mem_info *mem = &xdpf->mem; + struct skb_shared_info *sinfo; + int i; /* Curr only page_pool needs this */ - if (mem->type == MEM_TYPE_PAGE_POOL) - __xdp_release_frame(xdpf->data, mem); + if (mem->type != MEM_TYPE_PAGE_POOL) + return; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_release_frame(page_address(page), mem); + } +out: + __xdp_release_frame(xdpf->data, mem); +} + +static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) +{ + struct skb_shared_info *sinfo; + unsigned int len = xdpf->len; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + len += sinfo->xdp_frags_size; +out: + return len; +} + +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size); +static inline int +xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id) +{ + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); } -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); +int xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, void *allocator); +void xdp_unreg_mem_model(struct xdp_mem_info *mem); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a9d5b7603b89..c9df68d5f258 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -3,6 +3,7 @@ #define __LINUX_NET_XDP_PRIV_H__ #include <linux/rhashtable.h> +#include <net/xdp.h> /* Private to net/core/xdp.c, but used by trace/events/xdp.h */ struct xdp_mem_allocator { @@ -10,7 +11,6 @@ struct xdp_mem_allocator { union { void *allocator; struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; }; struct rhash_head node; struct rcu_head rcu; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index fff069d2ed1b..3057e1a4a11c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -6,6 +6,7 @@ #ifndef _LINUX_XDP_SOCK_H #define _LINUX_XDP_SOCK_H +#include <linux/bpf.h> #include <linux/workqueue.h> #include <linux/if_xdp.h> #include <linux/mutex.h> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 443d45951564..9c0d860609ba 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -9,11 +9,14 @@ #include <net/xdp_sock.h> #include <net/xsk_buff_pool.h> +#define XDP_UMEM_MIN_CHUNK_SHIFT 11 +#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) + #ifdef CONFIG_XDP_SOCKETS void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); @@ -44,6 +47,15 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + return pool->heads[0].xdp.rxq->napi_id; +#else + return 0; +#endif +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { @@ -142,8 +154,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, return false; } -static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, - u32 max) +static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) { return 0; } @@ -199,6 +210,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { @@ -239,6 +255,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } +static inline void xsk_buff_discard(struct xdp_buff *xdp) +{ +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..dbc81f5eb553 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -126,12 +126,17 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; -struct xfrm_state_offload { +enum { + XFRM_DEV_OFFLOAD_IN = 1, + XFRM_DEV_OFFLOAD_OUT, +}; + +struct xfrm_dev_offload { struct net_device *dev; + netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; - unsigned int num_exthdrs; - u8 flags; + u8 dir : 2; }; struct xfrm_mode { @@ -200,6 +205,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; @@ -241,7 +251,7 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct hrtimer mtimer; - struct xfrm_state_offload xso; + struct xfrm_dev_offload xso; /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -302,9 +312,15 @@ struct km_event { struct net *net; }; +struct xfrm_if_decode_session_result { + struct net *net; + u32 if_id; +}; + struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb, - unsigned short family); + bool (*decode_session)(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@ -389,7 +405,8 @@ struct xfrm_type { #define XFRM_TYPE_LOCAL_COADDR 4 #define XFRM_TYPE_REMOTE_COADDR 8 - int (*init_state)(struct xfrm_state *x); + int (*init_state)(struct xfrm_state *x, + struct netlink_ext_ack *extack); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); @@ -573,8 +590,8 @@ struct xfrm_mgr { bool (*is_alive)(const struct km_event *c); }; -int xfrm_register_km(struct xfrm_mgr *km); -int xfrm_unregister_km(struct xfrm_mgr *km); +void xfrm_register_km(struct xfrm_mgr *km); +void xfrm_unregister_km(struct xfrm_mgr *km); struct xfrm_tunnel_skb_cb { union { @@ -975,6 +992,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ + bool collect_md; }; struct xfrm_if { @@ -1000,7 +1018,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 -#define XFRM_ESP_NO_TRAILER 64 +/* 64 is free */ #define XFRM_DEV_RESUME 128 #define XFRM_XMIT 256 @@ -1075,24 +1093,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM -static inline bool -xfrm_default_allow(struct net *net, int dir) -{ - u8 def = net->xfrm.policy_default; - - switch (dir) { - case XFRM_POLICY_IN: - return def & XFRM_POL_DEFAULT_IN ? false : true; - case XFRM_POLICY_OUT: - return def & XFRM_POL_DEFAULT_OUT ? false : true; - case XFRM_POLICY_FWD: - return def & XFRM_POL_DEFAULT_FWD ? false : true; - } +int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, + unsigned short family); + +static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, + int dir) +{ + if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) + return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; + return false; } -int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, - unsigned short family); +static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, + int dir, unsigned short family) +{ + if (dir != XFRM_POLICY_OUT && family == AF_INET) { + /* same dst may be used for traffic originating from + * devices with different policy settings. + */ + return IPCB(skb)->flags & IPSKB_NOPOLICY; + } + return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); +} static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, @@ -1104,13 +1127,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - if (xfrm_default_allow(net, dir)) - return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); - else - return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); + return __xfrm_check_nopolicy(net, skb, dir) || + __xfrm_check_dev_nopolicy(skb, dir, family) || + __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) @@ -1162,13 +1181,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_FWD)) - return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); - else - return (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); + if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && + net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) + return true; + + return (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) @@ -1185,6 +1203,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk); static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { + if (!sk_fullsock(osk)) + return 0; sk->sk_policy[0] = NULL; sk->sk_policy[1] = NULL; if (unlikely(osk->sk_policy[0] || osk->sk_policy[1])) @@ -1561,10 +1581,10 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); -int xfrm_init_replay(struct xfrm_state *x); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); +int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, + struct netlink_ext_ack *extack); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); @@ -1675,14 +1695,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); @@ -1867,12 +1888,13 @@ void xfrm_dev_resume(struct sk_buff *skb); void xfrm_dev_backlog(struct softnet_data *sd); struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, - struct xfrm_user_offload *xuo); + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); @@ -1898,7 +1920,7 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) static inline void xfrm_dev_state_delete(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev) xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); @@ -1906,14 +1928,14 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x) static inline void xfrm_dev_state_free(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put(dev); + netdev_put(dev, &xso->dev_tracker); } } #else @@ -1930,7 +1952,7 @@ static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_fea return skb; } -static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index ddeefc4a1040..f787c3f524b0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -60,6 +60,7 @@ struct xsk_buff_pool { */ dma_addr_t *dma_pages; struct xdp_buff_xsk *heads; + struct xdp_desc *tx_descs; u64 chunk_mask; u64 addrs_cnt; u32 free_list_cnt; @@ -94,8 +95,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id); +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); |